|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc -mtriple armv8---eabi -mattr=+aes,+fix-cortex-a57-aes-1742098 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-FIX-NOSCHED | 
|  |  | 
|  | ; These CPUs should have the fix enabled by default. They use different | 
|  | ; FileCheck prefixes because some instructions are scheduled differently. | 
|  | ; | 
|  | ; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a57 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX | 
|  | ; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a72 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX | 
|  |  | 
|  | ; This checks that adding `+fix-cortex-a57-aes-1742098` causes `vorr` to be | 
|  | ; inserted wherever the compiler cannot prove that either input to the first aes | 
|  | ; instruction in a fused aes pair was set by 64-bit Neon register writes or | 
|  | ; 128-bit Neon register writes. All other register writes are unsafe, and | 
|  | ; require a `vorr` to protect the AES input. | 
|  |  | 
|  | declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) | 
|  | declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) | 
|  | declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) | 
|  | declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) | 
|  |  | 
|  | declare arm_aapcs_vfpcc <16 x i8> @get_input() local_unnamed_addr | 
|  | declare arm_aapcs_vfpcc <16 x i8> @get_inputf16(half) local_unnamed_addr | 
|  | declare arm_aapcs_vfpcc <16 x i8> @get_inputf32(float) local_unnamed_addr | 
|  |  | 
|  |  | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_zero(<16 x i8>* %0) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_zero: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    vmov.i32 q9, #0x0 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q9, q8 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q9 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %2 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> zeroinitializer, <16 x i8> %2) | 
|  | %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3) | 
|  | store <16 x i8> %4, <16 x i8>* %0, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_via_call1(<16 x i8>* %0) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_via_call1: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    .save {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    push {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    mov r4, r0 | 
|  | ; CHECK-FIX-NEXT:    bl get_input | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    aese.8 q0, q8 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    pop {r4, pc} | 
|  | %2 = call arm_aapcs_vfpcc <16 x i8> @get_input() | 
|  | %3 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %2, <16 x i8> %3) | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %4) | 
|  | store <16 x i8> %5, <16 x i8>* %0, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_via_call2(half %0, <16 x i8>* %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_via_call2: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    .save {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    push {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    mov r4, r0 | 
|  | ; CHECK-FIX-NEXT:    bl get_inputf16 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    aese.8 q0, q8 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    pop {r4, pc} | 
|  | %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0) | 
|  | %4 = load <16 x i8>, <16 x i8>* %1, align 8 | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) | 
|  | %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) | 
|  | store <16 x i8> %6, <16 x i8>* %1, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_via_call3(float %0, <16 x i8>* %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_via_call3: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    .save {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    push {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    mov r4, r0 | 
|  | ; CHECK-FIX-NEXT:    bl get_inputf32 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    aese.8 q0, q8 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    pop {r4, pc} | 
|  | %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0) | 
|  | %4 = load <16 x i8>, <16 x i8>* %1, align 8 | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) | 
|  | %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) | 
|  | store <16 x i8> %6, <16 x i8>* %1, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_once_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r1] | 
|  | ; CHECK-FIX-NEXT:    aese.8 q9, q8 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q9 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %3 = load <16 x i8>, <16 x i8>* %1, align 8 | 
|  | %4 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) | 
|  | %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) | 
|  | store <16 x i8> %6, <16 x i8>* %1, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_once_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q1, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q0, q1 | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0) | 
|  | %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3) | 
|  | ret <16 x i8> %4 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_twice_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r1] | 
|  | ; CHECK-FIX-NEXT:    aese.8 q9, q8 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q9 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r0] | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q9 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %3 = load <16 x i8>, <16 x i8>* %1, align 8 | 
|  | %4 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) | 
|  | %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) | 
|  | store <16 x i8> %6, <16 x i8>* %1, align 8 | 
|  | %7 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7) | 
|  | %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8) | 
|  | store <16 x i8> %9, <16 x i8>* %1, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_twice_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q1, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q0, q8 | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0) | 
|  | %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3) | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %4, <16 x i8> %0) | 
|  | %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) | 
|  | ret <16 x i8> %6 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_loop_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB8_1: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d18, d19}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q9, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q9 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB8_1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.2: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_loop_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB8_1: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d18, d19}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q9, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q9 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bne .LBB8_1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.2: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = icmp eq i32 %0, 0 | 
|  | br i1 %4, label %5, label %6 | 
|  |  | 
|  | 5: | 
|  | ret void | 
|  |  | 
|  | 6: | 
|  | %7 = phi i32 [ %12, %6 ], [ 0, %3 ] | 
|  | %8 = load <16 x i8>, <16 x i8>* %2, align 8 | 
|  | %9 = load <16 x i8>, <16 x i8>* %1, align 8 | 
|  | %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %9) | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10) | 
|  | store <16 x i8> %11, <16 x i8>* %2, align 8 | 
|  | %12 = add nuw i32 %7, 1 | 
|  | %13 = icmp eq i32 %12, %0 | 
|  | br i1 %13, label %5, label %6 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB9_2 | 
|  | ; CHECK-FIX-NEXT:  .LBB9_1: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q1, q0 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q1, q1 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB9_1 | 
|  | ; CHECK-FIX-NEXT:  .LBB9_2: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = icmp eq i32 %0, 0 | 
|  | br i1 %4, label %5, label %7 | 
|  |  | 
|  | 5: | 
|  | %6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ] | 
|  | ret <16 x i8> %6 | 
|  |  | 
|  | 7: | 
|  | %8 = phi i32 [ %12, %7 ], [ 0, %3 ] | 
|  | %9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ] | 
|  | %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %1) | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10) | 
|  | %12 = add nuw i32 %8, 1 | 
|  | %13 = icmp eq i32 %12, %0 | 
|  | br i1 %13, label %5, label %7 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrb r0, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.8 d0[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.8 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_set8_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrb r0, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.8 d0[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.8 d16[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = load i8, i8* %0, align 1 | 
|  | %5 = load <16 x i8>, <16 x i8>* %2, align 8 | 
|  | %6 = insertelement <16 x i8> %5, i8 %4, i64 0 | 
|  | %7 = insertelement <16 x i8> %1, i8 %4, i64 0 | 
|  | %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7) | 
|  | %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8) | 
|  | store <16 x i8> %9, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set8_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d0[0], r0 | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d16[0], r0 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = load <16 x i8>, <16 x i8>* %2, align 8 | 
|  | %5 = insertelement <16 x i8> %4, i8 %0, i64 0 | 
|  | %6 = insertelement <16 x i8> %1, i8 %0, i64 0 | 
|  | %7 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %5, <16 x i8> %6) | 
|  | %8 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %7) | 
|  | store <16 x i8> %8, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set8_cond_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB12_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    vld1.8 {d16[0]}, [r1] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB12_3 | 
|  | ; CHECK-FIX-NEXT:    b .LBB12_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB12_2: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB12_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB12_3: | 
|  | ; CHECK-FIX-NEXT:    vld1.8 {d0[0]}, [r1] | 
|  | ; CHECK-FIX-NEXT:  .LBB12_4: | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | br i1 %0, label %5, label %9 | 
|  |  | 
|  | 5: | 
|  | %6 = load i8, i8* %1, align 1 | 
|  | %7 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | %8 = insertelement <16 x i8> %7, i8 %6, i64 0 | 
|  | br label %11 | 
|  |  | 
|  | 9: | 
|  | %10 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %11 | 
|  |  | 
|  | 11: | 
|  | %12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ] | 
|  | br i1 %0, label %13, label %16 | 
|  |  | 
|  | 13: | 
|  | %14 = load i8, i8* %1, align 1 | 
|  | %15 = insertelement <16 x i8> %2, i8 %14, i64 0 | 
|  | br label %16 | 
|  |  | 
|  | 16: | 
|  | %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ] | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %17) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set8_cond_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB13_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB13_2: @ %select.end | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB13_4 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB13_4: @ %select.end2 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | %6 = insertelement <16 x i8> %5, i8 %1, i64 0 | 
|  | %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5 | 
|  | %8 = insertelement <16 x i8> %2, i8 %1, i64 0 | 
|  | %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2 | 
|  | %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %9) | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10) | 
|  | store <16 x i8> %11, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set8_loop_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    ldrb r1, [r1] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    strb r1, [r2] | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB14_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:  .LBB14_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB14_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = load i8, i8* %1, align 1 | 
|  | %6 = insertelement <16 x i8> %2, i8 %5, i64 0 | 
|  | %7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0 | 
|  | store i8 %5, i8* %7, align 8 | 
|  | %8 = icmp eq i32 %0, 0 | 
|  | br i1 %8, label %12, label %9 | 
|  |  | 
|  | 9: | 
|  | %10 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %13 | 
|  |  | 
|  | 11: | 
|  | store <16 x i8> %17, <16 x i8>* %3, align 8 | 
|  | br label %12 | 
|  |  | 
|  | 12: | 
|  | ret void | 
|  |  | 
|  | 13: | 
|  | %14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ] | 
|  | %15 = phi i32 [ 0, %9 ], [ %18, %13 ] | 
|  | %16 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %6) | 
|  | %17 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %16) | 
|  | %18 = add nuw i32 %15, 1 | 
|  | %19 = icmp eq i32 %18, %0 | 
|  | br i1 %19, label %11, label %13 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set8_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB15_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:  .LBB15_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB15_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %10, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = insertelement <16 x i8> %2, i8 %1, i64 0 | 
|  | %8 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %11 | 
|  |  | 
|  | 9: | 
|  | store <16 x i8> %16, <16 x i8>* %3, align 8 | 
|  | br label %10 | 
|  |  | 
|  | 10: | 
|  | ret void | 
|  |  | 
|  | 11: | 
|  | %12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ] | 
|  | %13 = phi i32 [ 0, %6 ], [ %17, %11 ] | 
|  | %14 = insertelement <16 x i8> %12, i8 %1, i64 0 | 
|  | %15 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %7) | 
|  | %16 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %15) | 
|  | %17 = add nuw i32 %13, 1 | 
|  | %18 = icmp eq i32 %17, %0 | 
|  | br i1 %18, label %9, label %11 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r0, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d0[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_set16_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d0[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = load i16, i16* %0, align 2 | 
|  | %5 = bitcast <16 x i8>* %2 to <8 x i16>* | 
|  | %6 = load <8 x i16>, <8 x i16>* %5, align 8 | 
|  | %7 = insertelement <8 x i16> %6, i16 %4, i64 0 | 
|  | %8 = bitcast <8 x i16> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8> %1 to <8 x i16> | 
|  | %10 = insertelement <8 x i16> %9, i16 %4, i64 0 | 
|  | %11 = bitcast <8 x i16> %10 to <16 x i8> | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) | 
|  | store <16 x i8> %13, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set16_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d0[0], r0 | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d16[0], r0 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = bitcast <16 x i8>* %2 to <8 x i16>* | 
|  | %5 = load <8 x i16>, <8 x i16>* %4, align 8 | 
|  | %6 = insertelement <8 x i16> %5, i16 %0, i64 0 | 
|  | %7 = bitcast <8 x i16> %6 to <16 x i8> | 
|  | %8 = bitcast <16 x i8> %1 to <8 x i16> | 
|  | %9 = insertelement <8 x i16> %8, i16 %0, i64 0 | 
|  | %10 = bitcast <8 x i16> %9 to <16 x i8> | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) | 
|  | store <16 x i8> %12, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set16_cond_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB18_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    vld1.16 {d16[0]}, [r1:16] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB18_3 | 
|  | ; CHECK-FIX-NEXT:    b .LBB18_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB18_2: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB18_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB18_3: | 
|  | ; CHECK-FIX-NEXT:    vld1.16 {d0[0]}, [r1:16] | 
|  | ; CHECK-FIX-NEXT:  .LBB18_4: | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | br i1 %0, label %5, label %10 | 
|  |  | 
|  | 5: | 
|  | %6 = load i16, i16* %1, align 2 | 
|  | %7 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %8 = load <8 x i16>, <8 x i16>* %7, align 8 | 
|  | %9 = insertelement <8 x i16> %8, i16 %6, i64 0 | 
|  | br label %13 | 
|  |  | 
|  | 10: | 
|  | %11 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %12 = load <8 x i16>, <8 x i16>* %11, align 8 | 
|  | br label %13 | 
|  |  | 
|  | 13: | 
|  | %14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ] | 
|  | br i1 %0, label %15, label %19 | 
|  |  | 
|  | 15: | 
|  | %16 = load i16, i16* %1, align 2 | 
|  | %17 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %18 = insertelement <8 x i16> %17, i16 %16, i64 0 | 
|  | br label %21 | 
|  |  | 
|  | 19: | 
|  | %20 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | br label %21 | 
|  |  | 
|  | 21: | 
|  | %22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ] | 
|  | %23 = bitcast <8 x i16> %14 to <16 x i8> | 
|  | %24 = bitcast <8 x i16> %22 to <16 x i8> | 
|  | %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) | 
|  | %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) | 
|  | store <16 x i8> %26, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set16_cond_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB19_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB19_2: @ %select.end | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB19_4 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB19_4: @ %select.end2 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %6 = load <8 x i16>, <8 x i16>* %5, align 8 | 
|  | %7 = insertelement <8 x i16> %6, i16 %1, i64 0 | 
|  | %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6 | 
|  | %9 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %10 = insertelement <8 x i16> %9, i16 %1, i64 0 | 
|  | %11 = select i1 %0, <8 x i16> %10, <8 x i16> %9 | 
|  | %12 = bitcast <8 x i16> %8 to <16 x i8> | 
|  | %13 = bitcast <8 x i16> %11 to <16 x i8> | 
|  | %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) | 
|  | %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) | 
|  | store <16 x i8> %15, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set16_loop_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    ldrh r1, [r1] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    strh r1, [r2] | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB20_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:  .LBB20_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB20_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = load i16, i16* %1, align 2 | 
|  | %6 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %7 = insertelement <8 x i16> %6, i16 %5, i64 0 | 
|  | %8 = bitcast <8 x i16> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8>* %3 to i16* | 
|  | store i16 %5, i16* %9, align 8 | 
|  | %10 = icmp eq i32 %0, 0 | 
|  | br i1 %10, label %14, label %11 | 
|  |  | 
|  | 11: | 
|  | %12 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 13: | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | br label %14 | 
|  |  | 
|  | 14: | 
|  | ret void | 
|  |  | 
|  | 15: | 
|  | %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] | 
|  | %17 = phi i32 [ 0, %11 ], [ %20, %15 ] | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) | 
|  | %20 = add nuw i32 %17, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %13, label %15 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set16_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB21_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB21_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bne .LBB21_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %12, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %8 = insertelement <8 x i16> %7, i16 %1, i64 0 | 
|  | %9 = bitcast <8 x i16> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %11 = bitcast <16 x i8>* %3 to i16* | 
|  | br label %13 | 
|  |  | 
|  | 12: | 
|  | ret void | 
|  |  | 
|  | 13: | 
|  | %14 = phi i32 [ 0, %6 ], [ %20, %13 ] | 
|  | %15 = load <8 x i16>, <8 x i16>* %10, align 8 | 
|  | %16 = insertelement <8 x i16> %15, i16 %1, i64 0 | 
|  | %17 = bitcast <8 x i16> %16 to <16 x i8> | 
|  | store i16 %1, i16* %11, align 8 | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | %20 = add nuw i32 %14, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %12, label %13 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d0[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_set32_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r0, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d0[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = load i32, i32* %0, align 4 | 
|  | %5 = bitcast <16 x i8>* %2 to <4 x i32>* | 
|  | %6 = load <4 x i32>, <4 x i32>* %5, align 8 | 
|  | %7 = insertelement <4 x i32> %6, i32 %4, i64 0 | 
|  | %8 = bitcast <4 x i32> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8> %1 to <4 x i32> | 
|  | %10 = insertelement <4 x i32> %9, i32 %4, i64 0 | 
|  | %11 = bitcast <4 x i32> %10 to <16 x i8> | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) | 
|  | store <16 x i8> %13, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set32_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r0 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = bitcast <16 x i8>* %2 to <4 x i32>* | 
|  | %5 = load <4 x i32>, <4 x i32>* %4, align 8 | 
|  | %6 = insertelement <4 x i32> %5, i32 %0, i64 0 | 
|  | %7 = bitcast <4 x i32> %6 to <16 x i8> | 
|  | %8 = bitcast <16 x i8> %1 to <4 x i32> | 
|  | %9 = insertelement <4 x i32> %8, i32 %0, i64 0 | 
|  | %10 = bitcast <4 x i32> %9 to <16 x i8> | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) | 
|  | store <16 x i8> %12, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set32_cond_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB24_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB24_3 | 
|  | ; CHECK-FIX-NEXT:    b .LBB24_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB24_2: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB24_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB24_3: | 
|  | ; CHECK-FIX-NEXT:    vld1.32 {d0[0]}, [r1:32] | 
|  | ; CHECK-FIX-NEXT:  .LBB24_4: | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | br i1 %0, label %5, label %10 | 
|  |  | 
|  | 5: | 
|  | %6 = load i32, i32* %1, align 4 | 
|  | %7 = bitcast <16 x i8>* %3 to <4 x i32>* | 
|  | %8 = load <4 x i32>, <4 x i32>* %7, align 8 | 
|  | %9 = insertelement <4 x i32> %8, i32 %6, i64 0 | 
|  | br label %13 | 
|  |  | 
|  | 10: | 
|  | %11 = bitcast <16 x i8>* %3 to <4 x i32>* | 
|  | %12 = load <4 x i32>, <4 x i32>* %11, align 8 | 
|  | br label %13 | 
|  |  | 
|  | 13: | 
|  | %14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ] | 
|  | br i1 %0, label %15, label %19 | 
|  |  | 
|  | 15: | 
|  | %16 = load i32, i32* %1, align 4 | 
|  | %17 = bitcast <16 x i8> %2 to <4 x i32> | 
|  | %18 = insertelement <4 x i32> %17, i32 %16, i64 0 | 
|  | br label %21 | 
|  |  | 
|  | 19: | 
|  | %20 = bitcast <16 x i8> %2 to <4 x i32> | 
|  | br label %21 | 
|  |  | 
|  | 21: | 
|  | %22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ] | 
|  | %23 = bitcast <4 x i32> %14 to <16 x i8> | 
|  | %24 = bitcast <4 x i32> %22 to <16 x i8> | 
|  | %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) | 
|  | %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) | 
|  | store <16 x i8> %26, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set32_cond_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB25_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB25_2: @ %select.end | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB25_4 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB25_4: @ %select.end2 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = bitcast <16 x i8>* %3 to <4 x i32>* | 
|  | %6 = load <4 x i32>, <4 x i32>* %5, align 8 | 
|  | %7 = insertelement <4 x i32> %6, i32 %1, i64 0 | 
|  | %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6 | 
|  | %9 = bitcast <16 x i8> %2 to <4 x i32> | 
|  | %10 = insertelement <4 x i32> %9, i32 %1, i64 0 | 
|  | %11 = select i1 %0, <4 x i32> %10, <4 x i32> %9 | 
|  | %12 = bitcast <4 x i32> %8 to <16 x i8> | 
|  | %13 = bitcast <4 x i32> %11 to <16 x i8> | 
|  | %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) | 
|  | %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) | 
|  | store <16 x i8> %15, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set32_loop_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    ldr r1, [r1] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    str r1, [r2] | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB26_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:  .LBB26_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB26_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = load i32, i32* %1, align 4 | 
|  | %6 = bitcast <16 x i8> %2 to <4 x i32> | 
|  | %7 = insertelement <4 x i32> %6, i32 %5, i64 0 | 
|  | %8 = bitcast <4 x i32> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8>* %3 to i32* | 
|  | store i32 %5, i32* %9, align 8 | 
|  | %10 = icmp eq i32 %0, 0 | 
|  | br i1 %10, label %14, label %11 | 
|  |  | 
|  | 11: | 
|  | %12 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 13: | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | br label %14 | 
|  |  | 
|  | 14: | 
|  | ret void | 
|  |  | 
|  | 15: | 
|  | %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] | 
|  | %17 = phi i32 [ 0, %11 ], [ %20, %15 ] | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) | 
|  | %20 = add nuw i32 %17, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %13, label %15 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set32_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB27_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB27_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bne .LBB27_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %12, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = bitcast <16 x i8> %2 to <4 x i32> | 
|  | %8 = insertelement <4 x i32> %7, i32 %1, i64 0 | 
|  | %9 = bitcast <4 x i32> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8>* %3 to <4 x i32>* | 
|  | %11 = bitcast <16 x i8>* %3 to i32* | 
|  | br label %13 | 
|  |  | 
|  | 12: | 
|  | ret void | 
|  |  | 
|  | 13: | 
|  | %14 = phi i32 [ 0, %6 ], [ %20, %13 ] | 
|  | %15 = load <4 x i32>, <4 x i32>* %10, align 8 | 
|  | %16 = insertelement <4 x i32> %15, i32 %1, i64 0 | 
|  | %17 = bitcast <4 x i32> %16 to <16 x i8> | 
|  | store i32 %1, i32* %11, align 8 | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | %20 = add nuw i32 %14, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %12, label %13 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vldr d0, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr d16, d0, d0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_set64_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vldr d0, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr d16, d0, d0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = load i64, i64* %0, align 8 | 
|  | %5 = bitcast <16 x i8>* %2 to <2 x i64>* | 
|  | %6 = load <2 x i64>, <2 x i64>* %5, align 8 | 
|  | %7 = insertelement <2 x i64> %6, i64 %4, i64 0 | 
|  | %8 = bitcast <2 x i64> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8> %1 to <2 x i64> | 
|  | %10 = insertelement <2 x i64> %9, i64 %4, i64 0 | 
|  | %11 = bitcast <2 x i64> %10 to <16 x i8> | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) | 
|  | store <16 x i8> %13, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set64_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r0 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[1], r1 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[1], r1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = bitcast <16 x i8>* %2 to <2 x i64>* | 
|  | %5 = load <2 x i64>, <2 x i64>* %4, align 8 | 
|  | %6 = insertelement <2 x i64> %5, i64 %0, i64 0 | 
|  | %7 = bitcast <2 x i64> %6 to <16 x i8> | 
|  | %8 = bitcast <16 x i8> %1 to <2 x i64> | 
|  | %9 = insertelement <2 x i64> %8, i64 %0, i64 0 | 
|  | %10 = bitcast <2 x i64> %9 to <16 x i8> | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) | 
|  | store <16 x i8> %12, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_set64_cond_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB30_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vldr d16, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    b .LBB30_3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB30_2: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB30_3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vldrne d0, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_set64_cond_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    beq .LBB30_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vldr d18, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr d16, d18, d18 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    b .LBB30_3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB30_2: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB30_3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vldrne d0, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | br i1 %0, label %5, label %10 | 
|  |  | 
|  | 5: | 
|  | %6 = load i64, i64* %1, align 8 | 
|  | %7 = bitcast <16 x i8>* %3 to <2 x i64>* | 
|  | %8 = load <2 x i64>, <2 x i64>* %7, align 8 | 
|  | %9 = insertelement <2 x i64> %8, i64 %6, i64 0 | 
|  | br label %13 | 
|  |  | 
|  | 10: | 
|  | %11 = bitcast <16 x i8>* %3 to <2 x i64>* | 
|  | %12 = load <2 x i64>, <2 x i64>* %11, align 8 | 
|  | br label %13 | 
|  |  | 
|  | 13: | 
|  | %14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ] | 
|  | br i1 %0, label %15, label %19 | 
|  |  | 
|  | 15: | 
|  | %16 = load i64, i64* %1, align 8 | 
|  | %17 = bitcast <16 x i8> %2 to <2 x i64> | 
|  | %18 = insertelement <2 x i64> %17, i64 %16, i64 0 | 
|  | br label %21 | 
|  |  | 
|  | 19: | 
|  | %20 = bitcast <16 x i8> %2 to <2 x i64> | 
|  | br label %21 | 
|  |  | 
|  | 21: | 
|  | %22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ] | 
|  | %23 = bitcast <2 x i64> %14 to <16 x i8> | 
|  | %24 = bitcast <2 x i64> %22 to <16 x i8> | 
|  | %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) | 
|  | %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) | 
|  | store <16 x i8> %26, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set64_cond_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    ldr r1, [sp] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    beq .LBB31_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r2 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[1], r3 | 
|  | ; CHECK-FIX-NEXT:  .LBB31_2: @ %select.end | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB31_4 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r2 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[1], r3 | 
|  | ; CHECK-FIX-NEXT:  .LBB31_4: @ %select.end2 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = bitcast <16 x i8>* %3 to <2 x i64>* | 
|  | %6 = load <2 x i64>, <2 x i64>* %5, align 8 | 
|  | %7 = insertelement <2 x i64> %6, i64 %1, i64 0 | 
|  | %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6 | 
|  | %9 = bitcast <16 x i8> %2 to <2 x i64> | 
|  | %10 = insertelement <2 x i64> %9, i64 %1, i64 0 | 
|  | %11 = select i1 %0, <2 x i64> %10, <2 x i64> %9 | 
|  | %12 = bitcast <2 x i64> %8 to <16 x i8> | 
|  | %13 = bitcast <2 x i64> %11 to <16 x i8> | 
|  | %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) | 
|  | %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) | 
|  | store <16 x i8> %15, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrd r4, r5, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    strd r4, r5, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB32_4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov d0, r4, r5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB32_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB32_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB32_4: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r11, pc} | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_set64_loop_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrd r4, r5, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    strd r4, r5, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    popeq {r4, r5, r11, pc} | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB32_1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov d0, r4, r5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB32_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bne .LBB32_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r11, pc} | 
|  | %5 = load i64, i64* %1, align 8 | 
|  | %6 = bitcast <16 x i8> %2 to <2 x i64> | 
|  | %7 = insertelement <2 x i64> %6, i64 %5, i64 0 | 
|  | %8 = bitcast <2 x i64> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8>* %3 to i64* | 
|  | store i64 %5, i64* %9, align 8 | 
|  | %10 = icmp eq i32 %0, 0 | 
|  | br i1 %10, label %14, label %11 | 
|  |  | 
|  | 11: | 
|  | %12 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 13: | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | br label %14 | 
|  |  | 
|  | 14: | 
|  | ret void | 
|  |  | 
|  | 15: | 
|  | %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] | 
|  | %17 = phi i32 [ 0, %11 ], [ %20, %15 ] | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) | 
|  | %20 = add nuw i32 %17, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %13, label %15 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_set64_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB33_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r2 | 
|  | ; CHECK-FIX-NEXT:    ldr r1, [sp] | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[1], r3 | 
|  | ; CHECK-FIX-NEXT:  .LBB33_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r2 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[1], r3 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bne .LBB33_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %12, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = bitcast <16 x i8> %2 to <2 x i64> | 
|  | %8 = insertelement <2 x i64> %7, i64 %1, i64 0 | 
|  | %9 = bitcast <2 x i64> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8>* %3 to <2 x i64>* | 
|  | %11 = bitcast <16 x i8>* %3 to i64* | 
|  | br label %13 | 
|  |  | 
|  | 12: | 
|  | ret void | 
|  |  | 
|  | 13: | 
|  | %14 = phi i32 [ 0, %6 ], [ %20, %13 ] | 
|  | %15 = load <2 x i64>, <2 x i64>* %10, align 8 | 
|  | %16 = insertelement <2 x i64> %15, i64 %1, i64 0 | 
|  | %17 = bitcast <2 x i64> %16 to <16 x i8> | 
|  | store i64 %1, i64* %11, align 8 | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | %20 = add nuw i32 %14, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %12, label %13 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r0, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d0[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_setf16_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d0[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = bitcast half* %0 to i16* | 
|  | %5 = load i16, i16* %4, align 2 | 
|  | %6 = bitcast <16 x i8>* %2 to <8 x i16>* | 
|  | %7 = load <8 x i16>, <8 x i16>* %6, align 8 | 
|  | %8 = insertelement <8 x i16> %7, i16 %5, i64 0 | 
|  | %9 = bitcast <8 x i16> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8> %1 to <8 x i16> | 
|  | %11 = insertelement <8 x i16> %10, i16 %5, i64 0 | 
|  | %12 = bitcast <8 x i16> %11 to <16 x i8> | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %12) | 
|  | %14 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %13) | 
|  | store <16 x i8> %14, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_setf16_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    vmov r1, s0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d2[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q1 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = bitcast <16 x i8>* %2 to <8 x i16>* | 
|  | %5 = load <8 x i16>, <8 x i16>* %4, align 8 | 
|  | %6 = bitcast half %0 to i16 | 
|  | %7 = insertelement <8 x i16> %5, i16 %6, i64 0 | 
|  | %8 = bitcast <8 x i16> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8> %1 to <8 x i16> | 
|  | %10 = insertelement <8 x i16> %9, i16 %6, i64 0 | 
|  | %11 = bitcast <8 x i16> %10 to <16 x i8> | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) | 
|  | store <16 x i8> %13, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .pad #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB36_3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d16[1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r7, r6, d17 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d16[0]}, [r1:16] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r5, d16[0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r6, r6, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r7, r7, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r5, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r6, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB36_4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB36_2: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r4, r6, d1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r3, d0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r5, r4, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r6, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r3, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r9, r4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r6, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    b .LBB36_5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB36_3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #14] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #12] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #8] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #6] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r2, #10] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r10, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #4] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB36_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB36_4: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r5, r3, d1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r4, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r7, d0[1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d0[0]}, [r1:16] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r0, d0[0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r9, r5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r6, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r7, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r3, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r5, r5, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r7, r4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB36_5: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r8, r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r10, r0, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r8, lr, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r6, r12, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r3, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r9, r5, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r7, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r11, r1, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r1, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q9 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .pad #24 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #24 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    beq .LBB36_3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q9, q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.16 {d18[0]}, [r1:16] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d18[0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d16[1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r3, r6, d17 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r11, r6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bne .LBB36_4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB36_2: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r1, r7, d0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r0, r1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r6, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r12, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r9, r1, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    mov r0, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r7, r3, d1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r10, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r5, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth lr, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r8, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    mov r3, r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    b .LBB36_5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB36_3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r11, [r2, #12] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r4, [r2, #14] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #4] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #6] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #8] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #10] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    beq .LBB36_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB36_4: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q8, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r5, d0[1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.16 {d16[0]}, [r1:16] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r6, r5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r12, r5, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r5, r7, d1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r1, d16[0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r10, r5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r5, r5, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth lr, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r8, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r0, r1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r9, r1, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB36_5: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r11, r11, r4, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r12, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r10, r5, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r0, r1, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, lr, r8, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r1, r1, r3, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r3, r4, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r11 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r9, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q9, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q9 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #24 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | 
|  | br i1 %0, label %5, label %12 | 
|  |  | 
|  | 5: | 
|  | %6 = bitcast half* %1 to i16* | 
|  | %7 = load i16, i16* %6, align 2 | 
|  | %8 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %9 = load <8 x i16>, <8 x i16>* %8, align 8 | 
|  | %10 = insertelement <8 x i16> %9, i16 %7, i64 0 | 
|  | %11 = bitcast <8 x i16> %10 to <8 x half> | 
|  | br label %15 | 
|  |  | 
|  | 12: | 
|  | %13 = bitcast <16 x i8>* %3 to <8 x half>* | 
|  | %14 = load <8 x half>, <8 x half>* %13, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 15: | 
|  | %16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ] | 
|  | br i1 %0, label %17, label %23 | 
|  |  | 
|  | 17: | 
|  | %18 = bitcast half* %1 to i16* | 
|  | %19 = load i16, i16* %18, align 2 | 
|  | %20 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %21 = insertelement <8 x i16> %20, i16 %19, i64 0 | 
|  | %22 = bitcast <8 x i16> %21 to <8 x half> | 
|  | br label %25 | 
|  |  | 
|  | 23: | 
|  | %24 = bitcast <16 x i8> %2 to <8 x half> | 
|  | br label %25 | 
|  |  | 
|  | 25: | 
|  | %26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ] | 
|  | %27 = bitcast <8 x half> %16 to <16 x i8> | 
|  | %28 = bitcast <8 x half> %26 to <16 x i8> | 
|  | %29 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %27, <16 x i8> %28) | 
|  | %30 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %29) | 
|  | store <16 x i8> %30, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_val: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .pad #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r12, s0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB37_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d16[1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r7, r6, d17 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r12 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r5, d16[0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r2, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r6, r6, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r7, r7, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r5, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r3, r5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r6, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    b .LBB37_3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB37_2: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #14] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #12] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #8] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r2, [r1, #6] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r1, #10] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r2, [r1, #4] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB37_3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB37_5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.4: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r6, d2[1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r3, r2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r2, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r4, r7, d3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d2[0], r12 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r0, d2[0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r5, r6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r6, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r9, r7, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r7, r2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r2, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r4, r4, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    b .LBB37_6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB37_5: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r3, r6, d3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r5, d2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r4, r3, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r9, r6, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r5, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r5, r5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB37_6: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r8, r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r3, [sp] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r3, r0, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r8, lr, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r5, r12, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r0, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r10, r4, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r7, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r11, r9, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r2, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q9 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_val: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .pad #28 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #28 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r2, s0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    beq .LBB37_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d16[1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r7, d16[0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r6, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r7, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r6, [sp, #24] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r3, r7, d17 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r6, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r11, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r7, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r6, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    b .LBB37_3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB37_2: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r11, [r1, #12] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r7, [r1, #14] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #24] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #4] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #6] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #8] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #10] | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB37_3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    beq .LBB37_5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.4: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d2[1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d2[0], r2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r4, r6, d3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r10, r4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r4, r4, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth lr, r6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r8, r6, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r5, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r12, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r2, d2[0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r0, r2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r9, r2, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    b .LBB37_6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB37_5: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r2, r3, d2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r0, r2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r9, r2, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r5, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r12, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    mov r0, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r6, r7, d3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r10, r6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth lr, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r8, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    mov r7, r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB37_6: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r11, r11, r7, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r5, r12, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r10, r4, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r0, r2, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, lr, r8, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r2, r2, r3, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r6, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r6, [sp] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r11 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r9, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q9, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q9 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #28 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | 
|  | br i1 %0, label %5, label %11 | 
|  |  | 
|  | 5: | 
|  | %6 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %7 = load <8 x i16>, <8 x i16>* %6, align 8 | 
|  | %8 = bitcast half %1 to i16 | 
|  | %9 = insertelement <8 x i16> %7, i16 %8, i64 0 | 
|  | %10 = bitcast <8 x i16> %9 to <8 x half> | 
|  | br label %14 | 
|  |  | 
|  | 11: | 
|  | %12 = bitcast <16 x i8>* %3 to <8 x half>* | 
|  | %13 = load <8 x half>, <8 x half>* %12, align 8 | 
|  | br label %14 | 
|  |  | 
|  | 14: | 
|  | %15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ] | 
|  | br i1 %0, label %16, label %21 | 
|  |  | 
|  | 16: | 
|  | %17 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %18 = bitcast half %1 to i16 | 
|  | %19 = insertelement <8 x i16> %17, i16 %18, i64 0 | 
|  | %20 = bitcast <8 x i16> %19 to <8 x half> | 
|  | br label %23 | 
|  |  | 
|  | 21: | 
|  | %22 = bitcast <16 x i8> %2 to <8 x half> | 
|  | br label %23 | 
|  |  | 
|  | 23: | 
|  | %24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ] | 
|  | %25 = bitcast <8 x half> %15 to <16 x i8> | 
|  | %26 = bitcast <8 x half> %24 to <16 x i8> | 
|  | %27 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %25, <16 x i8> %26) | 
|  | %28 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %27) | 
|  | store <16 x i8> %28, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    ldrh r1, [r1] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    strh r1, [r2] | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB38_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:  .LBB38_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB38_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = bitcast half* %1 to i16* | 
|  | %6 = load i16, i16* %5, align 2 | 
|  | %7 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %8 = insertelement <8 x i16> %7, i16 %6, i64 0 | 
|  | %9 = bitcast <8 x i16> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8>* %3 to i16* | 
|  | store i16 %6, i16* %10, align 8 | 
|  | %11 = icmp eq i32 %0, 0 | 
|  | br i1 %11, label %15, label %12 | 
|  |  | 
|  | 12: | 
|  | %13 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %16 | 
|  |  | 
|  | 14: | 
|  | store <16 x i8> %20, <16 x i8>* %3, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 15: | 
|  | ret void | 
|  |  | 
|  | 16: | 
|  | %17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ] | 
|  | %18 = phi i32 [ 0, %12 ], [ %21, %16 ] | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) | 
|  | %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19) | 
|  | %21 = add nuw i32 %18, 1 | 
|  | %22 = icmp eq i32 %21, %0 | 
|  | br i1 %22, label %14, label %16 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_setf16_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB39_1: | 
|  | ; CHECK-FIX-NEXT:    vmov r2, s0 | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d2[0], r2 | 
|  | ; CHECK-FIX-NEXT:  .LBB39_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d16[0], r2 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q1 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bne .LBB39_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %13, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %8 = bitcast half %1 to i16 | 
|  | %9 = insertelement <8 x i16> %7, i16 %8, i64 0 | 
|  | %10 = bitcast <8 x i16> %9 to <16 x i8> | 
|  | %11 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %12 = bitcast <16 x i8>* %3 to half* | 
|  | br label %14 | 
|  |  | 
|  | 13: | 
|  | ret void | 
|  |  | 
|  | 14: | 
|  | %15 = phi i32 [ 0, %6 ], [ %21, %14 ] | 
|  | %16 = load <8 x i16>, <8 x i16>* %11, align 8 | 
|  | %17 = insertelement <8 x i16> %16, i16 %8, i64 0 | 
|  | %18 = bitcast <8 x i16> %17 to <16 x i8> | 
|  | store half %1, half* %12, align 8 | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %18, <16 x i8> %10) | 
|  | %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19) | 
|  | store <16 x i8> %20, <16 x i8>* %3, align 8 | 
|  | %21 = add nuw i32 %15, 1 | 
|  | %22 = icmp eq i32 %21, %0 | 
|  | br i1 %22, label %13, label %14 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_setf32_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vldr s0, [r0] | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d2, d3}, [r1] | 
|  | ; CHECK-FIX-NEXT:    vmov.f32 s4, s0 | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q1, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q1 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = load float, float* %0, align 4 | 
|  | %5 = bitcast <16 x i8>* %2 to <4 x float>* | 
|  | %6 = load <4 x float>, <4 x float>* %5, align 8 | 
|  | %7 = insertelement <4 x float> %6, float %4, i64 0 | 
|  | %8 = bitcast <4 x float> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8> %1 to <4 x float> | 
|  | %10 = insertelement <4 x float> %9, float %4, i64 0 | 
|  | %11 = bitcast <4 x float> %10 to <16 x i8> | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) | 
|  | store <16 x i8> %13, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_setf32_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vmov.f32 s4, s0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d0, d1}, [r0] | 
|  | ; CHECK-FIX-NEXT:    vmov.f32 s0, s4 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    aese.8 q0, q1 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = bitcast <16 x i8>* %2 to <4 x float>* | 
|  | %5 = load <4 x float>, <4 x float>* %4, align 8 | 
|  | %6 = insertelement <4 x float> %5, float %0, i64 0 | 
|  | %7 = bitcast <4 x float> %6 to <16 x i8> | 
|  | %8 = bitcast <16 x i8> %1 to <4 x float> | 
|  | %9 = insertelement <4 x float> %8, float %0, i64 0 | 
|  | %10 = bitcast <4 x float> %9 to <16 x i8> | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) | 
|  | store <16 x i8> %12, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB42_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB42_3 | 
|  | ; CHECK-FIX-NEXT:    b .LBB42_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB42_2: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB42_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB42_3: | 
|  | ; CHECK-FIX-NEXT:    vld1.32 {d0[0]}, [r1:32] | 
|  | ; CHECK-FIX-NEXT:  .LBB42_4: | 
|  | ; CHECK-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | br i1 %0, label %5, label %10 | 
|  |  | 
|  | 5: | 
|  | %6 = load float, float* %1, align 4 | 
|  | %7 = bitcast <16 x i8>* %3 to <4 x float>* | 
|  | %8 = load <4 x float>, <4 x float>* %7, align 8 | 
|  | %9 = insertelement <4 x float> %8, float %6, i64 0 | 
|  | br label %13 | 
|  |  | 
|  | 10: | 
|  | %11 = bitcast <16 x i8>* %3 to <4 x float>* | 
|  | %12 = load <4 x float>, <4 x float>* %11, align 8 | 
|  | br label %13 | 
|  |  | 
|  | 13: | 
|  | %14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ] | 
|  | br i1 %0, label %15, label %19 | 
|  |  | 
|  | 15: | 
|  | %16 = load float, float* %1, align 4 | 
|  | %17 = bitcast <16 x i8> %2 to <4 x float> | 
|  | %18 = insertelement <4 x float> %17, float %16, i64 0 | 
|  | br label %21 | 
|  |  | 
|  | 19: | 
|  | %20 = bitcast <16 x i8> %2 to <4 x float> | 
|  | br label %21 | 
|  |  | 
|  | 21: | 
|  | %22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ] | 
|  | %23 = bitcast <4 x float> %14 to <16 x i8> | 
|  | %24 = bitcast <4 x float> %22 to <16 x i8> | 
|  | %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) | 
|  | %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) | 
|  | store <16 x i8> %26, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_cond_via_val: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d4, d5}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmovne.f32 s8, s0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q2, q2, q2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmovne.f32 s4, s0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q2, q1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_setf32_cond_via_val: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d4, d5}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmovne.f32 s8, s0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q2, q2, q2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmovne.f32 s4, s0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q2, q1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %5 = bitcast <16 x i8>* %3 to <4 x float>* | 
|  | %6 = load <4 x float>, <4 x float>* %5, align 8 | 
|  | %7 = insertelement <4 x float> %6, float %1, i64 0 | 
|  | %8 = select i1 %0, <4 x float> %7, <4 x float> %6 | 
|  | %9 = bitcast <16 x i8> %2 to <4 x float> | 
|  | %10 = insertelement <4 x float> %9, float %1, i64 0 | 
|  | %11 = select i1 %0, <4 x float> %10, <4 x float> %9 | 
|  | %12 = bitcast <4 x float> %8 to <16 x i8> | 
|  | %13 = bitcast <4 x float> %11 to <16 x i8> | 
|  | %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) | 
|  | %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) | 
|  | store <16 x i8> %15, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vldr s4, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vstr s4, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB44_1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s0, s4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB44_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB44_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vldr s4, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vstr s4, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB44_1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s0, s4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB44_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bne .LBB44_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %5 = load float, float* %1, align 4 | 
|  | %6 = bitcast <16 x i8> %2 to <4 x float> | 
|  | %7 = insertelement <4 x float> %6, float %5, i64 0 | 
|  | %8 = bitcast <4 x float> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8>* %3 to float* | 
|  | store float %5, float* %9, align 8 | 
|  | %10 = icmp eq i32 %0, 0 | 
|  | br i1 %10, label %14, label %11 | 
|  |  | 
|  | 11: | 
|  | %12 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 13: | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | br label %14 | 
|  |  | 
|  | 14: | 
|  | ret void | 
|  |  | 
|  | 15: | 
|  | %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] | 
|  | %17 = phi i32 [ 0, %11 ], [ %20, %15 ] | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) | 
|  | %20 = add nuw i32 %17, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %13, label %15 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_val: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB45_1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s4, s0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB45_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d4, d5}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s8, s0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q2, q2, q2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q2, q1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB45_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_val: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB45_1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s4, s0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB45_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d4, d5}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s8, s0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q2, q2, q2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q2, q1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bne .LBB45_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %12, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = bitcast <16 x i8> %2 to <4 x float> | 
|  | %8 = insertelement <4 x float> %7, float %1, i64 0 | 
|  | %9 = bitcast <4 x float> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8>* %3 to <4 x float>* | 
|  | %11 = bitcast <16 x i8>* %3 to float* | 
|  | br label %13 | 
|  |  | 
|  | 12: | 
|  | ret void | 
|  |  | 
|  | 13: | 
|  | %14 = phi i32 [ 0, %6 ], [ %20, %13 ] | 
|  | %15 = load <4 x float>, <4 x float>* %10, align 8 | 
|  | %16 = insertelement <4 x float> %15, float %1, i64 0 | 
|  | %17 = bitcast <4 x float> %16 to <16 x i8> | 
|  | store float %1, float* %11, align 8 | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | %20 = add nuw i32 %14, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %12, label %13 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_zero(<16 x i8>* %0) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_zero: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    vmov.i32 q9, #0x0 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q9, q8 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q9 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %2 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> zeroinitializer, <16 x i8> %2) | 
|  | %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3) | 
|  | store <16 x i8> %4, <16 x i8>* %0, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_via_call1(<16 x i8>* %0) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_via_call1: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    .save {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    push {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    mov r4, r0 | 
|  | ; CHECK-FIX-NEXT:    bl get_input | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q0, q8 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    pop {r4, pc} | 
|  | %2 = call arm_aapcs_vfpcc <16 x i8> @get_input() | 
|  | %3 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %4 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %2, <16 x i8> %3) | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %4) | 
|  | store <16 x i8> %5, <16 x i8>* %0, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_via_call2(half %0, <16 x i8>* %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_via_call2: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    .save {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    push {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    mov r4, r0 | 
|  | ; CHECK-FIX-NEXT:    bl get_inputf16 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q0, q8 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    pop {r4, pc} | 
|  | %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0) | 
|  | %4 = load <16 x i8>, <16 x i8>* %1, align 8 | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) | 
|  | %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) | 
|  | store <16 x i8> %6, <16 x i8>* %1, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_via_call3(float %0, <16 x i8>* %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_via_call3: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    .save {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    push {r4, lr} | 
|  | ; CHECK-FIX-NEXT:    mov r4, r0 | 
|  | ; CHECK-FIX-NEXT:    bl get_inputf32 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q0, q8 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4] | 
|  | ; CHECK-FIX-NEXT:    pop {r4, pc} | 
|  | %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0) | 
|  | %4 = load <16 x i8>, <16 x i8>* %1, align 8 | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) | 
|  | %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) | 
|  | store <16 x i8> %6, <16 x i8>* %1, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_once_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r1] | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q9, q8 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q9 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %3 = load <16 x i8>, <16 x i8>* %1, align 8 | 
|  | %4 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) | 
|  | %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) | 
|  | store <16 x i8> %6, <16 x i8>* %1, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_once_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q1, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q0, q1 | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0) | 
|  | %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3) | 
|  | ret <16 x i8> %4 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_twice_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r1] | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q9, q8 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q9 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r0] | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q9 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %3 = load <16 x i8>, <16 x i8>* %1, align 8 | 
|  | %4 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) | 
|  | %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) | 
|  | store <16 x i8> %6, <16 x i8>* %1, align 8 | 
|  | %7 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7) | 
|  | %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8) | 
|  | store <16 x i8> %9, <16 x i8>* %1, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_twice_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q1, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q0, q8 | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0) | 
|  | %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3) | 
|  | %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %4, <16 x i8> %0) | 
|  | %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) | 
|  | ret <16 x i8> %6 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_loop_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB54_1: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d18, d19}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q9, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q9 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB54_1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.2: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_loop_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB54_1: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d18, d19}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q9, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q9 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bne .LBB54_1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.2: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = icmp eq i32 %0, 0 | 
|  | br i1 %4, label %5, label %6 | 
|  |  | 
|  | 5: | 
|  | ret void | 
|  |  | 
|  | 6: | 
|  | %7 = phi i32 [ %12, %6 ], [ 0, %3 ] | 
|  | %8 = load <16 x i8>, <16 x i8>* %2, align 8 | 
|  | %9 = load <16 x i8>, <16 x i8>* %1, align 8 | 
|  | %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %9) | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10) | 
|  | store <16 x i8> %11, <16 x i8>* %2, align 8 | 
|  | %12 = add nuw i32 %7, 1 | 
|  | %13 = icmp eq i32 %12, %0 | 
|  | br i1 %13, label %5, label %6 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB55_2 | 
|  | ; CHECK-FIX-NEXT:  .LBB55_1: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q1, q0 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q1, q1 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB55_1 | 
|  | ; CHECK-FIX-NEXT:  .LBB55_2: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = icmp eq i32 %0, 0 | 
|  | br i1 %4, label %5, label %7 | 
|  |  | 
|  | 5: | 
|  | %6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ] | 
|  | ret <16 x i8> %6 | 
|  |  | 
|  | 7: | 
|  | %8 = phi i32 [ %12, %7 ], [ 0, %3 ] | 
|  | %9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ] | 
|  | %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %1) | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10) | 
|  | %12 = add nuw i32 %8, 1 | 
|  | %13 = icmp eq i32 %12, %0 | 
|  | br i1 %13, label %5, label %7 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrb r0, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.8 d0[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.8 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_set8_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrb r0, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.8 d0[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.8 d16[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = load i8, i8* %0, align 1 | 
|  | %5 = load <16 x i8>, <16 x i8>* %2, align 8 | 
|  | %6 = insertelement <16 x i8> %5, i8 %4, i64 0 | 
|  | %7 = insertelement <16 x i8> %1, i8 %4, i64 0 | 
|  | %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7) | 
|  | %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8) | 
|  | store <16 x i8> %9, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set8_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d0[0], r0 | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d16[0], r0 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = load <16 x i8>, <16 x i8>* %2, align 8 | 
|  | %5 = insertelement <16 x i8> %4, i8 %0, i64 0 | 
|  | %6 = insertelement <16 x i8> %1, i8 %0, i64 0 | 
|  | %7 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %5, <16 x i8> %6) | 
|  | %8 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %7) | 
|  | store <16 x i8> %8, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB58_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    vld1.8 {d16[0]}, [r1] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB58_3 | 
|  | ; CHECK-FIX-NEXT:    b .LBB58_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB58_2: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB58_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB58_3: | 
|  | ; CHECK-FIX-NEXT:    vld1.8 {d0[0]}, [r1] | 
|  | ; CHECK-FIX-NEXT:  .LBB58_4: | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | br i1 %0, label %5, label %9 | 
|  |  | 
|  | 5: | 
|  | %6 = load i8, i8* %1, align 1 | 
|  | %7 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | %8 = insertelement <16 x i8> %7, i8 %6, i64 0 | 
|  | br label %11 | 
|  |  | 
|  | 9: | 
|  | %10 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %11 | 
|  |  | 
|  | 11: | 
|  | %12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ] | 
|  | br i1 %0, label %13, label %16 | 
|  |  | 
|  | 13: | 
|  | %14 = load i8, i8* %1, align 1 | 
|  | %15 = insertelement <16 x i8> %2, i8 %14, i64 0 | 
|  | br label %16 | 
|  |  | 
|  | 16: | 
|  | %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ] | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %17) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set8_cond_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB59_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB59_2: @ %select.end | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB59_4 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB59_4: @ %select.end2 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | %6 = insertelement <16 x i8> %5, i8 %1, i64 0 | 
|  | %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5 | 
|  | %8 = insertelement <16 x i8> %2, i8 %1, i64 0 | 
|  | %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2 | 
|  | %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %9) | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10) | 
|  | store <16 x i8> %11, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    ldrb r1, [r1] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    strb r1, [r2] | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB60_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:  .LBB60_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB60_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = load i8, i8* %1, align 1 | 
|  | %6 = insertelement <16 x i8> %2, i8 %5, i64 0 | 
|  | %7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0 | 
|  | store i8 %5, i8* %7, align 8 | 
|  | %8 = icmp eq i32 %0, 0 | 
|  | br i1 %8, label %12, label %9 | 
|  |  | 
|  | 9: | 
|  | %10 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %13 | 
|  |  | 
|  | 11: | 
|  | store <16 x i8> %17, <16 x i8>* %3, align 8 | 
|  | br label %12 | 
|  |  | 
|  | 12: | 
|  | ret void | 
|  |  | 
|  | 13: | 
|  | %14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ] | 
|  | %15 = phi i32 [ 0, %9 ], [ %18, %13 ] | 
|  | %16 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %6) | 
|  | %17 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %16) | 
|  | %18 = add nuw i32 %15, 1 | 
|  | %19 = icmp eq i32 %18, %0 | 
|  | br i1 %19, label %11, label %13 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set8_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB61_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:  .LBB61_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    vmov.8 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB61_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %10, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = insertelement <16 x i8> %2, i8 %1, i64 0 | 
|  | %8 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %11 | 
|  |  | 
|  | 9: | 
|  | store <16 x i8> %16, <16 x i8>* %3, align 8 | 
|  | br label %10 | 
|  |  | 
|  | 10: | 
|  | ret void | 
|  |  | 
|  | 11: | 
|  | %12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ] | 
|  | %13 = phi i32 [ 0, %6 ], [ %17, %11 ] | 
|  | %14 = insertelement <16 x i8> %12, i8 %1, i64 0 | 
|  | %15 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %7) | 
|  | %16 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %15) | 
|  | %17 = add nuw i32 %13, 1 | 
|  | %18 = icmp eq i32 %17, %0 | 
|  | br i1 %18, label %9, label %11 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r0, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d0[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_set16_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d0[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = load i16, i16* %0, align 2 | 
|  | %5 = bitcast <16 x i8>* %2 to <8 x i16>* | 
|  | %6 = load <8 x i16>, <8 x i16>* %5, align 8 | 
|  | %7 = insertelement <8 x i16> %6, i16 %4, i64 0 | 
|  | %8 = bitcast <8 x i16> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8> %1 to <8 x i16> | 
|  | %10 = insertelement <8 x i16> %9, i16 %4, i64 0 | 
|  | %11 = bitcast <8 x i16> %10 to <16 x i8> | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) | 
|  | store <16 x i8> %13, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set16_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d0[0], r0 | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d16[0], r0 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = bitcast <16 x i8>* %2 to <8 x i16>* | 
|  | %5 = load <8 x i16>, <8 x i16>* %4, align 8 | 
|  | %6 = insertelement <8 x i16> %5, i16 %0, i64 0 | 
|  | %7 = bitcast <8 x i16> %6 to <16 x i8> | 
|  | %8 = bitcast <16 x i8> %1 to <8 x i16> | 
|  | %9 = insertelement <8 x i16> %8, i16 %0, i64 0 | 
|  | %10 = bitcast <8 x i16> %9 to <16 x i8> | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) | 
|  | store <16 x i8> %12, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB64_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    vld1.16 {d16[0]}, [r1:16] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB64_3 | 
|  | ; CHECK-FIX-NEXT:    b .LBB64_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB64_2: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB64_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB64_3: | 
|  | ; CHECK-FIX-NEXT:    vld1.16 {d0[0]}, [r1:16] | 
|  | ; CHECK-FIX-NEXT:  .LBB64_4: | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | br i1 %0, label %5, label %10 | 
|  |  | 
|  | 5: | 
|  | %6 = load i16, i16* %1, align 2 | 
|  | %7 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %8 = load <8 x i16>, <8 x i16>* %7, align 8 | 
|  | %9 = insertelement <8 x i16> %8, i16 %6, i64 0 | 
|  | br label %13 | 
|  |  | 
|  | 10: | 
|  | %11 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %12 = load <8 x i16>, <8 x i16>* %11, align 8 | 
|  | br label %13 | 
|  |  | 
|  | 13: | 
|  | %14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ] | 
|  | br i1 %0, label %15, label %19 | 
|  |  | 
|  | 15: | 
|  | %16 = load i16, i16* %1, align 2 | 
|  | %17 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %18 = insertelement <8 x i16> %17, i16 %16, i64 0 | 
|  | br label %21 | 
|  |  | 
|  | 19: | 
|  | %20 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | br label %21 | 
|  |  | 
|  | 21: | 
|  | %22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ] | 
|  | %23 = bitcast <8 x i16> %14 to <16 x i8> | 
|  | %24 = bitcast <8 x i16> %22 to <16 x i8> | 
|  | %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) | 
|  | %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) | 
|  | store <16 x i8> %26, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set16_cond_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB65_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB65_2: @ %select.end | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB65_4 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB65_4: @ %select.end2 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %6 = load <8 x i16>, <8 x i16>* %5, align 8 | 
|  | %7 = insertelement <8 x i16> %6, i16 %1, i64 0 | 
|  | %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6 | 
|  | %9 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %10 = insertelement <8 x i16> %9, i16 %1, i64 0 | 
|  | %11 = select i1 %0, <8 x i16> %10, <8 x i16> %9 | 
|  | %12 = bitcast <8 x i16> %8 to <16 x i8> | 
|  | %13 = bitcast <8 x i16> %11 to <16 x i8> | 
|  | %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) | 
|  | %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) | 
|  | store <16 x i8> %15, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    ldrh r1, [r1] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    strh r1, [r2] | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB66_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:  .LBB66_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB66_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = load i16, i16* %1, align 2 | 
|  | %6 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %7 = insertelement <8 x i16> %6, i16 %5, i64 0 | 
|  | %8 = bitcast <8 x i16> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8>* %3 to i16* | 
|  | store i16 %5, i16* %9, align 8 | 
|  | %10 = icmp eq i32 %0, 0 | 
|  | br i1 %10, label %14, label %11 | 
|  |  | 
|  | 11: | 
|  | %12 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 13: | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | br label %14 | 
|  |  | 
|  | 14: | 
|  | ret void | 
|  |  | 
|  | 15: | 
|  | %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] | 
|  | %17 = phi i32 [ 0, %11 ], [ %20, %15 ] | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) | 
|  | %20 = add nuw i32 %17, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %13, label %15 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set16_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB67_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB67_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bne .LBB67_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %12, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %8 = insertelement <8 x i16> %7, i16 %1, i64 0 | 
|  | %9 = bitcast <8 x i16> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %11 = bitcast <16 x i8>* %3 to i16* | 
|  | br label %13 | 
|  |  | 
|  | 12: | 
|  | ret void | 
|  |  | 
|  | 13: | 
|  | %14 = phi i32 [ 0, %6 ], [ %20, %13 ] | 
|  | %15 = load <8 x i16>, <8 x i16>* %10, align 8 | 
|  | %16 = insertelement <8 x i16> %15, i16 %1, i64 0 | 
|  | %17 = bitcast <8 x i16> %16 to <16 x i8> | 
|  | store i16 %1, i16* %11, align 8 | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | %20 = add nuw i32 %14, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %12, label %13 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d0[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_set32_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r0, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d0[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = load i32, i32* %0, align 4 | 
|  | %5 = bitcast <16 x i8>* %2 to <4 x i32>* | 
|  | %6 = load <4 x i32>, <4 x i32>* %5, align 8 | 
|  | %7 = insertelement <4 x i32> %6, i32 %4, i64 0 | 
|  | %8 = bitcast <4 x i32> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8> %1 to <4 x i32> | 
|  | %10 = insertelement <4 x i32> %9, i32 %4, i64 0 | 
|  | %11 = bitcast <4 x i32> %10 to <16 x i8> | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) | 
|  | store <16 x i8> %13, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set32_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r0 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = bitcast <16 x i8>* %2 to <4 x i32>* | 
|  | %5 = load <4 x i32>, <4 x i32>* %4, align 8 | 
|  | %6 = insertelement <4 x i32> %5, i32 %0, i64 0 | 
|  | %7 = bitcast <4 x i32> %6 to <16 x i8> | 
|  | %8 = bitcast <16 x i8> %1 to <4 x i32> | 
|  | %9 = insertelement <4 x i32> %8, i32 %0, i64 0 | 
|  | %10 = bitcast <4 x i32> %9 to <16 x i8> | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) | 
|  | store <16 x i8> %12, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB70_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB70_3 | 
|  | ; CHECK-FIX-NEXT:    b .LBB70_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB70_2: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB70_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB70_3: | 
|  | ; CHECK-FIX-NEXT:    vld1.32 {d0[0]}, [r1:32] | 
|  | ; CHECK-FIX-NEXT:  .LBB70_4: | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | br i1 %0, label %5, label %10 | 
|  |  | 
|  | 5: | 
|  | %6 = load i32, i32* %1, align 4 | 
|  | %7 = bitcast <16 x i8>* %3 to <4 x i32>* | 
|  | %8 = load <4 x i32>, <4 x i32>* %7, align 8 | 
|  | %9 = insertelement <4 x i32> %8, i32 %6, i64 0 | 
|  | br label %13 | 
|  |  | 
|  | 10: | 
|  | %11 = bitcast <16 x i8>* %3 to <4 x i32>* | 
|  | %12 = load <4 x i32>, <4 x i32>* %11, align 8 | 
|  | br label %13 | 
|  |  | 
|  | 13: | 
|  | %14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ] | 
|  | br i1 %0, label %15, label %19 | 
|  |  | 
|  | 15: | 
|  | %16 = load i32, i32* %1, align 4 | 
|  | %17 = bitcast <16 x i8> %2 to <4 x i32> | 
|  | %18 = insertelement <4 x i32> %17, i32 %16, i64 0 | 
|  | br label %21 | 
|  |  | 
|  | 19: | 
|  | %20 = bitcast <16 x i8> %2 to <4 x i32> | 
|  | br label %21 | 
|  |  | 
|  | 21: | 
|  | %22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ] | 
|  | %23 = bitcast <4 x i32> %14 to <16 x i8> | 
|  | %24 = bitcast <4 x i32> %22 to <16 x i8> | 
|  | %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) | 
|  | %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) | 
|  | store <16 x i8> %26, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set32_cond_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB71_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB71_2: @ %select.end | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB71_4 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB71_4: @ %select.end2 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = bitcast <16 x i8>* %3 to <4 x i32>* | 
|  | %6 = load <4 x i32>, <4 x i32>* %5, align 8 | 
|  | %7 = insertelement <4 x i32> %6, i32 %1, i64 0 | 
|  | %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6 | 
|  | %9 = bitcast <16 x i8> %2 to <4 x i32> | 
|  | %10 = insertelement <4 x i32> %9, i32 %1, i64 0 | 
|  | %11 = select i1 %0, <4 x i32> %10, <4 x i32> %9 | 
|  | %12 = bitcast <4 x i32> %8 to <16 x i8> | 
|  | %13 = bitcast <4 x i32> %11 to <16 x i8> | 
|  | %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) | 
|  | %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) | 
|  | store <16 x i8> %15, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    ldr r1, [r1] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    str r1, [r2] | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB72_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:  .LBB72_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB72_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = load i32, i32* %1, align 4 | 
|  | %6 = bitcast <16 x i8> %2 to <4 x i32> | 
|  | %7 = insertelement <4 x i32> %6, i32 %5, i64 0 | 
|  | %8 = bitcast <4 x i32> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8>* %3 to i32* | 
|  | store i32 %5, i32* %9, align 8 | 
|  | %10 = icmp eq i32 %0, 0 | 
|  | br i1 %10, label %14, label %11 | 
|  |  | 
|  | 11: | 
|  | %12 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 13: | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | br label %14 | 
|  |  | 
|  | 14: | 
|  | ret void | 
|  |  | 
|  | 15: | 
|  | %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] | 
|  | %17 = phi i32 [ 0, %11 ], [ %20, %15 ] | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) | 
|  | %20 = add nuw i32 %17, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %13, label %15 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set32_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB73_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:  .LBB73_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bne .LBB73_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %12, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = bitcast <16 x i8> %2 to <4 x i32> | 
|  | %8 = insertelement <4 x i32> %7, i32 %1, i64 0 | 
|  | %9 = bitcast <4 x i32> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8>* %3 to <4 x i32>* | 
|  | %11 = bitcast <16 x i8>* %3 to i32* | 
|  | br label %13 | 
|  |  | 
|  | 12: | 
|  | ret void | 
|  |  | 
|  | 13: | 
|  | %14 = phi i32 [ 0, %6 ], [ %20, %13 ] | 
|  | %15 = load <4 x i32>, <4 x i32>* %10, align 8 | 
|  | %16 = insertelement <4 x i32> %15, i32 %1, i64 0 | 
|  | %17 = bitcast <4 x i32> %16 to <16 x i8> | 
|  | store i32 %1, i32* %11, align 8 | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | %20 = add nuw i32 %14, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %12, label %13 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vldr d0, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr d16, d0, d0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_set64_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vldr d0, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr d16, d0, d0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = load i64, i64* %0, align 8 | 
|  | %5 = bitcast <16 x i8>* %2 to <2 x i64>* | 
|  | %6 = load <2 x i64>, <2 x i64>* %5, align 8 | 
|  | %7 = insertelement <2 x i64> %6, i64 %4, i64 0 | 
|  | %8 = bitcast <2 x i64> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8> %1 to <2 x i64> | 
|  | %10 = insertelement <2 x i64> %9, i64 %4, i64 0 | 
|  | %11 = bitcast <2 x i64> %10 to <16 x i8> | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) | 
|  | store <16 x i8> %13, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set64_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r0 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[1], r1 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[1], r1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = bitcast <16 x i8>* %2 to <2 x i64>* | 
|  | %5 = load <2 x i64>, <2 x i64>* %4, align 8 | 
|  | %6 = insertelement <2 x i64> %5, i64 %0, i64 0 | 
|  | %7 = bitcast <2 x i64> %6 to <16 x i8> | 
|  | %8 = bitcast <16 x i8> %1 to <2 x i64> | 
|  | %9 = insertelement <2 x i64> %8, i64 %0, i64 0 | 
|  | %10 = bitcast <2 x i64> %9 to <16 x i8> | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) | 
|  | store <16 x i8> %12, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB76_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vldr d16, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    b .LBB76_3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB76_2: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB76_3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vldrne d0, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_set64_cond_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    beq .LBB76_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vldr d18, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr d16, d18, d18 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    b .LBB76_3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB76_2: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB76_3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vldrne d0, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | br i1 %0, label %5, label %10 | 
|  |  | 
|  | 5: | 
|  | %6 = load i64, i64* %1, align 8 | 
|  | %7 = bitcast <16 x i8>* %3 to <2 x i64>* | 
|  | %8 = load <2 x i64>, <2 x i64>* %7, align 8 | 
|  | %9 = insertelement <2 x i64> %8, i64 %6, i64 0 | 
|  | br label %13 | 
|  |  | 
|  | 10: | 
|  | %11 = bitcast <16 x i8>* %3 to <2 x i64>* | 
|  | %12 = load <2 x i64>, <2 x i64>* %11, align 8 | 
|  | br label %13 | 
|  |  | 
|  | 13: | 
|  | %14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ] | 
|  | br i1 %0, label %15, label %19 | 
|  |  | 
|  | 15: | 
|  | %16 = load i64, i64* %1, align 8 | 
|  | %17 = bitcast <16 x i8> %2 to <2 x i64> | 
|  | %18 = insertelement <2 x i64> %17, i64 %16, i64 0 | 
|  | br label %21 | 
|  |  | 
|  | 19: | 
|  | %20 = bitcast <16 x i8> %2 to <2 x i64> | 
|  | br label %21 | 
|  |  | 
|  | 21: | 
|  | %22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ] | 
|  | %23 = bitcast <2 x i64> %14 to <16 x i8> | 
|  | %24 = bitcast <2 x i64> %22 to <16 x i8> | 
|  | %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) | 
|  | %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) | 
|  | store <16 x i8> %26, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set64_cond_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    ldr r1, [sp] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    beq .LBB77_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r2 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[1], r3 | 
|  | ; CHECK-FIX-NEXT:  .LBB77_2: @ %select.end | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB77_4 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r2 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[1], r3 | 
|  | ; CHECK-FIX-NEXT:  .LBB77_4: @ %select.end2 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = bitcast <16 x i8>* %3 to <2 x i64>* | 
|  | %6 = load <2 x i64>, <2 x i64>* %5, align 8 | 
|  | %7 = insertelement <2 x i64> %6, i64 %1, i64 0 | 
|  | %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6 | 
|  | %9 = bitcast <16 x i8> %2 to <2 x i64> | 
|  | %10 = insertelement <2 x i64> %9, i64 %1, i64 0 | 
|  | %11 = select i1 %0, <2 x i64> %10, <2 x i64> %9 | 
|  | %12 = bitcast <2 x i64> %8 to <16 x i8> | 
|  | %13 = bitcast <2 x i64> %11 to <16 x i8> | 
|  | %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) | 
|  | %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) | 
|  | store <16 x i8> %15, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrd r4, r5, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    strd r4, r5, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB78_4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov d0, r4, r5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB78_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB78_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB78_4: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r11, pc} | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_set64_loop_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrd r4, r5, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    strd r4, r5, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    popeq {r4, r5, r11, pc} | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB78_1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov d0, r4, r5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB78_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bne .LBB78_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r11, pc} | 
|  | %5 = load i64, i64* %1, align 8 | 
|  | %6 = bitcast <16 x i8> %2 to <2 x i64> | 
|  | %7 = insertelement <2 x i64> %6, i64 %5, i64 0 | 
|  | %8 = bitcast <2 x i64> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8>* %3 to i64* | 
|  | store i64 %5, i64* %9, align 8 | 
|  | %10 = icmp eq i32 %0, 0 | 
|  | br i1 %10, label %14, label %11 | 
|  |  | 
|  | 11: | 
|  | %12 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 13: | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | br label %14 | 
|  |  | 
|  | 14: | 
|  | ret void | 
|  |  | 
|  | 15: | 
|  | %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] | 
|  | %17 = phi i32 [ 0, %11 ], [ %20, %15 ] | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) | 
|  | %20 = add nuw i32 %17, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %13, label %15 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_set64_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB79_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[0], r2 | 
|  | ; CHECK-FIX-NEXT:    ldr r1, [sp] | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d0[1], r3 | 
|  | ; CHECK-FIX-NEXT:  .LBB79_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[0], r2 | 
|  | ; CHECK-FIX-NEXT:    vmov.32 d16[1], r3 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bne .LBB79_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %12, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = bitcast <16 x i8> %2 to <2 x i64> | 
|  | %8 = insertelement <2 x i64> %7, i64 %1, i64 0 | 
|  | %9 = bitcast <2 x i64> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8>* %3 to <2 x i64>* | 
|  | %11 = bitcast <16 x i8>* %3 to i64* | 
|  | br label %13 | 
|  |  | 
|  | 12: | 
|  | ret void | 
|  |  | 
|  | 13: | 
|  | %14 = phi i32 [ 0, %6 ], [ %20, %13 ] | 
|  | %15 = load <2 x i64>, <2 x i64>* %10, align 8 | 
|  | %16 = insertelement <2 x i64> %15, i64 %1, i64 0 | 
|  | %17 = bitcast <2 x i64> %16 to <16 x i8> | 
|  | store i64 %1, i64* %11, align 8 | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | %20 = add nuw i32 %14, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %12, label %13 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r0, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d0[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d0[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %4 = bitcast half* %0 to i16* | 
|  | %5 = load i16, i16* %4, align 2 | 
|  | %6 = bitcast <16 x i8>* %2 to <8 x i16>* | 
|  | %7 = load <8 x i16>, <8 x i16>* %6, align 8 | 
|  | %8 = insertelement <8 x i16> %7, i16 %5, i64 0 | 
|  | %9 = bitcast <8 x i16> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8> %1 to <8 x i16> | 
|  | %11 = insertelement <8 x i16> %10, i16 %5, i64 0 | 
|  | %12 = bitcast <8 x i16> %11 to <16 x i8> | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %12) | 
|  | %14 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %13) | 
|  | store <16 x i8> %14, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_setf16_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    vmov r1, s0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d2[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d16[0], r1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q1 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = bitcast <16 x i8>* %2 to <8 x i16>* | 
|  | %5 = load <8 x i16>, <8 x i16>* %4, align 8 | 
|  | %6 = bitcast half %0 to i16 | 
|  | %7 = insertelement <8 x i16> %5, i16 %6, i64 0 | 
|  | %8 = bitcast <8 x i16> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8> %1 to <8 x i16> | 
|  | %10 = insertelement <8 x i16> %9, i16 %6, i64 0 | 
|  | %11 = bitcast <8 x i16> %10 to <16 x i8> | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) | 
|  | store <16 x i8> %13, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .pad #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB82_3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d16[1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r7, r6, d17 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d16[0]}, [r1:16] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r5, d16[0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r6, r6, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r7, r7, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r5, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r6, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB82_4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB82_2: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r4, r6, d1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r3, d0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r5, r4, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r6, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r3, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r9, r4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r6, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    b .LBB82_5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB82_3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #14] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #12] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #8] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #6] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r2, #10] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r10, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #4] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r2, #2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB82_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB82_4: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r5, r3, d1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r4, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r7, d0[1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.16 {d0[0]}, [r1:16] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r0, d0[0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r9, r5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r6, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r7, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r1, r3, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r5, r5, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r7, r4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB82_5: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r8, r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r10, r0, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r8, lr, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r6, r12, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r3, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r9, r5, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r7, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r11, r1, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r1, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q9 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .pad #24 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #24 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    beq .LBB82_3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q9, q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.16 {d18[0]}, [r1:16] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d18[0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d16[1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r3, r6, d17 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r11, r6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bne .LBB82_4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB82_2: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r1, r7, d0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r0, r1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r6, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r12, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r9, r1, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    mov r0, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r7, r3, d1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r10, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r5, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth lr, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r8, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    mov r3, r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    b .LBB82_5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB82_3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r11, [r2, #12] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r4, [r2, #14] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #4] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #6] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #8] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r2, #10] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    beq .LBB82_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB82_4: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q8, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r5, d0[1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.16 {d16[0]}, [r1:16] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r6, r5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r12, r5, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r5, r7, d1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r1, d16[0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r10, r5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r5, r5, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth lr, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r8, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r0, r1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r9, r1, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB82_5: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r11, r11, r4, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r4, [sp, #16] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r12, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r10, r5, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r0, r1, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, lr, r8, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r1, r1, r3, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r3, r4, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r11 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r9, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q9, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q9 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #24 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | 
|  | br i1 %0, label %5, label %12 | 
|  |  | 
|  | 5: | 
|  | %6 = bitcast half* %1 to i16* | 
|  | %7 = load i16, i16* %6, align 2 | 
|  | %8 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %9 = load <8 x i16>, <8 x i16>* %8, align 8 | 
|  | %10 = insertelement <8 x i16> %9, i16 %7, i64 0 | 
|  | %11 = bitcast <8 x i16> %10 to <8 x half> | 
|  | br label %15 | 
|  |  | 
|  | 12: | 
|  | %13 = bitcast <16 x i8>* %3 to <8 x half>* | 
|  | %14 = load <8 x half>, <8 x half>* %13, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 15: | 
|  | %16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ] | 
|  | br i1 %0, label %17, label %23 | 
|  |  | 
|  | 17: | 
|  | %18 = bitcast half* %1 to i16* | 
|  | %19 = load i16, i16* %18, align 2 | 
|  | %20 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %21 = insertelement <8 x i16> %20, i16 %19, i64 0 | 
|  | %22 = bitcast <8 x i16> %21 to <8 x half> | 
|  | br label %25 | 
|  |  | 
|  | 23: | 
|  | %24 = bitcast <16 x i8> %2 to <8 x half> | 
|  | br label %25 | 
|  |  | 
|  | 25: | 
|  | %26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ] | 
|  | %27 = bitcast <8 x half> %16 to <16 x i8> | 
|  | %28 = bitcast <8 x half> %26 to <16 x i8> | 
|  | %29 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %27, <16 x i8> %28) | 
|  | %30 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %29) | 
|  | store <16 x i8> %30, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_val: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .pad #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r12, s0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB83_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r3, d16[1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r7, r6, d17 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r12 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r5, d16[0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r2, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r4, r6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r6, r6, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r7, r7, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r3, r5, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r4, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r3, r5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r6, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    b .LBB83_3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB83_2: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #14] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #12] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #8] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r2, [r1, #6] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1, #2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r1, #10] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r2, [r1, #4] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldrh r3, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB83_3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    beq .LBB83_5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.4: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r6, d2[1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r3, r2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r2, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r4, r7, d3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d2[0], r12 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 r0, d2[0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r5, r6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r6, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r9, r7, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r7, r2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    mov r2, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r4, r4, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    b .LBB83_6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB83_5: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r3, r6, d3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov r0, r5, d2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r4, r3, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r9, r6, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr r12, r5, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    lsr lr, r0, #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r11, r6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r10, r3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r5, r5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB83_6: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    uxth r8, r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r3, [sp] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r3, r0, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r8, lr, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r5, r12, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r0, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r10, r4, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r7, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r11, r9, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r2, lsl #16 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q9 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #24 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_val: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr} | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .pad #28 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #28 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r2, s0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    beq .LBB83_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d16[1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r7, d16[0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r6, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r7, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r7, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r6, [sp, #24] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r3, r7, d17 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r6, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r3, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r11, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r7, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r6, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    b .LBB83_3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB83_2: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r11, [r1, #12] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r7, [r1, #14] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #24] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #4] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #6] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #8] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1, #10] | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB83_3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #12] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    beq .LBB83_5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.4: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r3, d2[1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.16 d2[0], r2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r4, r6, d3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r10, r4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r4, r4, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth lr, r6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r8, r6, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r5, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r12, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 r2, d2[0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r0, r2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r9, r2, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    b .LBB83_6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB83_5: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r2, r3, d2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r0, r2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r9, r2, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r5, r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r12, r3, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill | 
|  | ; CHECK-CORTEX-FIX-NEXT:    mov r0, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov r6, r7, d3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth r10, r6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r4, r6, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    uxth lr, r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    lsr r8, r7, #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    mov r7, r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB83_6: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r11, r11, r7, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r6, [sp, #20] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r5, r12, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r10, r4, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r0, r2, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, lr, r8, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r2, r2, r3, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r6, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    ldr r6, [sp] @ 4-byte Reload | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r11 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r9, lsl #16 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q9, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q9 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #28 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} | 
|  | br i1 %0, label %5, label %11 | 
|  |  | 
|  | 5: | 
|  | %6 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %7 = load <8 x i16>, <8 x i16>* %6, align 8 | 
|  | %8 = bitcast half %1 to i16 | 
|  | %9 = insertelement <8 x i16> %7, i16 %8, i64 0 | 
|  | %10 = bitcast <8 x i16> %9 to <8 x half> | 
|  | br label %14 | 
|  |  | 
|  | 11: | 
|  | %12 = bitcast <16 x i8>* %3 to <8 x half>* | 
|  | %13 = load <8 x half>, <8 x half>* %12, align 8 | 
|  | br label %14 | 
|  |  | 
|  | 14: | 
|  | %15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ] | 
|  | br i1 %0, label %16, label %21 | 
|  |  | 
|  | 16: | 
|  | %17 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %18 = bitcast half %1 to i16 | 
|  | %19 = insertelement <8 x i16> %17, i16 %18, i64 0 | 
|  | %20 = bitcast <8 x i16> %19 to <8 x half> | 
|  | br label %23 | 
|  |  | 
|  | 21: | 
|  | %22 = bitcast <16 x i8> %2 to <8 x half> | 
|  | br label %23 | 
|  |  | 
|  | 23: | 
|  | %24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ] | 
|  | %25 = bitcast <8 x half> %15 to <16 x i8> | 
|  | %26 = bitcast <8 x half> %24 to <16 x i8> | 
|  | %27 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %25, <16 x i8> %26) | 
|  | %28 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %27) | 
|  | store <16 x i8> %28, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    ldrh r1, [r1] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    strh r1, [r2] | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB84_1: | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d0[0], r1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:  .LBB84_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB84_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = bitcast half* %1 to i16* | 
|  | %6 = load i16, i16* %5, align 2 | 
|  | %7 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %8 = insertelement <8 x i16> %7, i16 %6, i64 0 | 
|  | %9 = bitcast <8 x i16> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8>* %3 to i16* | 
|  | store i16 %6, i16* %10, align 8 | 
|  | %11 = icmp eq i32 %0, 0 | 
|  | br i1 %11, label %15, label %12 | 
|  |  | 
|  | 12: | 
|  | %13 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %16 | 
|  |  | 
|  | 14: | 
|  | store <16 x i8> %20, <16 x i8>* %3, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 15: | 
|  | ret void | 
|  |  | 
|  | 16: | 
|  | %17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ] | 
|  | %18 = phi i32 [ 0, %12 ], [ %21, %16 ] | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) | 
|  | %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19) | 
|  | %21 = add nuw i32 %18, 1 | 
|  | %22 = icmp eq i32 %21, %0 | 
|  | br i1 %22, label %14, label %16 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_setf16_loop_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NEXT:  .LBB85_1: | 
|  | ; CHECK-FIX-NEXT:    vmov r2, s0 | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d2[0], r2 | 
|  | ; CHECK-FIX-NEXT:  .LBB85_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NEXT:    vmov.16 d16[0], r2 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q1 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bne .LBB85_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %13, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = bitcast <16 x i8> %2 to <8 x i16> | 
|  | %8 = bitcast half %1 to i16 | 
|  | %9 = insertelement <8 x i16> %7, i16 %8, i64 0 | 
|  | %10 = bitcast <8 x i16> %9 to <16 x i8> | 
|  | %11 = bitcast <16 x i8>* %3 to <8 x i16>* | 
|  | %12 = bitcast <16 x i8>* %3 to half* | 
|  | br label %14 | 
|  |  | 
|  | 13: | 
|  | ret void | 
|  |  | 
|  | 14: | 
|  | %15 = phi i32 [ 0, %6 ], [ %21, %14 ] | 
|  | %16 = load <8 x i16>, <8 x i16>* %11, align 8 | 
|  | %17 = insertelement <8 x i16> %16, i16 %8, i64 0 | 
|  | %18 = bitcast <8 x i16> %17 to <16 x i8> | 
|  | store half %1, half* %12, align 8 | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %18, <16 x i8> %10) | 
|  | %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19) | 
|  | store <16 x i8> %20, <16 x i8>* %3, align 8 | 
|  | %21 = add nuw i32 %15, 1 | 
|  | %22 = icmp eq i32 %21, %0 | 
|  | br i1 %22, label %13, label %14 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_setf32_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vldr s0, [r0] | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d2, d3}, [r1] | 
|  | ; CHECK-FIX-NEXT:    vmov.f32 s4, s0 | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q1, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q1 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = load float, float* %0, align 4 | 
|  | %5 = bitcast <16 x i8>* %2 to <4 x float>* | 
|  | %6 = load <4 x float>, <4 x float>* %5, align 8 | 
|  | %7 = insertelement <4 x float> %6, float %4, i64 0 | 
|  | %8 = bitcast <4 x float> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8> %1 to <4 x float> | 
|  | %10 = insertelement <4 x float> %9, float %4, i64 0 | 
|  | %11 = bitcast <4 x float> %10 to <16 x i8> | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) | 
|  | %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) | 
|  | store <16 x i8> %13, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_setf32_via_val: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vmov.f32 s4, s0 | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d0, d1}, [r0] | 
|  | ; CHECK-FIX-NEXT:    vmov.f32 s0, s4 | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q0, q1 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | %4 = bitcast <16 x i8>* %2 to <4 x float>* | 
|  | %5 = load <4 x float>, <4 x float>* %4, align 8 | 
|  | %6 = insertelement <4 x float> %5, float %0, i64 0 | 
|  | %7 = bitcast <4 x float> %6 to <16 x i8> | 
|  | %8 = bitcast <16 x i8> %1 to <4 x float> | 
|  | %9 = insertelement <4 x float> %8, float %0, i64 0 | 
|  | %10 = bitcast <4 x float> %9 to <16 x i8> | 
|  | %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) | 
|  | %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) | 
|  | store <16 x i8> %12, <16 x i8>* %2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr: | 
|  | ; CHECK-FIX:       @ %bb.0: | 
|  | ; CHECK-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB88_2 | 
|  | ; CHECK-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    bne .LBB88_3 | 
|  | ; CHECK-FIX-NEXT:    b .LBB88_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB88_2: | 
|  | ; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NEXT:    beq .LBB88_4 | 
|  | ; CHECK-FIX-NEXT:  .LBB88_3: | 
|  | ; CHECK-FIX-NEXT:    vld1.32 {d0[0]}, [r1:32] | 
|  | ; CHECK-FIX-NEXT:  .LBB88_4: | 
|  | ; CHECK-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NEXT:    bx lr | 
|  | br i1 %0, label %5, label %10 | 
|  |  | 
|  | 5: | 
|  | %6 = load float, float* %1, align 4 | 
|  | %7 = bitcast <16 x i8>* %3 to <4 x float>* | 
|  | %8 = load <4 x float>, <4 x float>* %7, align 8 | 
|  | %9 = insertelement <4 x float> %8, float %6, i64 0 | 
|  | br label %13 | 
|  |  | 
|  | 10: | 
|  | %11 = bitcast <16 x i8>* %3 to <4 x float>* | 
|  | %12 = load <4 x float>, <4 x float>* %11, align 8 | 
|  | br label %13 | 
|  |  | 
|  | 13: | 
|  | %14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ] | 
|  | br i1 %0, label %15, label %19 | 
|  |  | 
|  | 15: | 
|  | %16 = load float, float* %1, align 4 | 
|  | %17 = bitcast <16 x i8> %2 to <4 x float> | 
|  | %18 = insertelement <4 x float> %17, float %16, i64 0 | 
|  | br label %21 | 
|  |  | 
|  | 19: | 
|  | %20 = bitcast <16 x i8> %2 to <4 x float> | 
|  | br label %21 | 
|  |  | 
|  | 21: | 
|  | %22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ] | 
|  | %23 = bitcast <4 x float> %14 to <16 x i8> | 
|  | %24 = bitcast <4 x float> %22 to <16 x i8> | 
|  | %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) | 
|  | %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) | 
|  | store <16 x i8> %26, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_cond_via_val: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d4, d5}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmovne.f32 s8, s0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q2, q2, q2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmovne.f32 s4, s0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q2, q1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_setf32_cond_via_val: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d4, d5}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmovne.f32 s8, s0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q2, q2, q2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmovne.f32 s4, s0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q2, q1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %5 = bitcast <16 x i8>* %3 to <4 x float>* | 
|  | %6 = load <4 x float>, <4 x float>* %5, align 8 | 
|  | %7 = insertelement <4 x float> %6, float %1, i64 0 | 
|  | %8 = select i1 %0, <4 x float> %7, <4 x float> %6 | 
|  | %9 = bitcast <16 x i8> %2 to <4 x float> | 
|  | %10 = insertelement <4 x float> %9, float %1, i64 0 | 
|  | %11 = select i1 %0, <4 x float> %10, <4 x float> %9 | 
|  | %12 = bitcast <4 x float> %8 to <16 x i8> | 
|  | %13 = bitcast <4 x float> %11 to <16 x i8> | 
|  | %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) | 
|  | %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) | 
|  | store <16 x i8> %15, <16 x i8>* %3, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_ptr: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vldr s4, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vstr s4, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB90_1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s0, s4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB90_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB90_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_ptr: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vldr s4, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vstr s4, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB90_1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s0, s4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB90_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bne .LBB90_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %5 = load float, float* %1, align 4 | 
|  | %6 = bitcast <16 x i8> %2 to <4 x float> | 
|  | %7 = insertelement <4 x float> %6, float %5, i64 0 | 
|  | %8 = bitcast <4 x float> %7 to <16 x i8> | 
|  | %9 = bitcast <16 x i8>* %3 to float* | 
|  | store float %5, float* %9, align 8 | 
|  | %10 = icmp eq i32 %0, 0 | 
|  | br i1 %10, label %14, label %11 | 
|  |  | 
|  | 11: | 
|  | %12 = load <16 x i8>, <16 x i8>* %3, align 8 | 
|  | br label %15 | 
|  |  | 
|  | 13: | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | br label %14 | 
|  |  | 
|  | 14: | 
|  | ret void | 
|  |  | 
|  | 15: | 
|  | %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] | 
|  | %17 = phi i32 [ 0, %11 ], [ %20, %15 ] | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) | 
|  | %20 = add nuw i32 %17, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %13, label %15 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_val: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bxeq lr | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB91_1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s4, s0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LBB91_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d4, d5}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s8, s0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q2, q2, q2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q2, q1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bne .LBB91_2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_val: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bxeq lr | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB91_1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s4, s0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LBB91_2: @ =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d4, d5}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s8, s0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q2, q2, q2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vorr q1, q1, q1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesd.8 q2, q1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bne .LBB91_2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.3: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | %5 = icmp eq i32 %0, 0 | 
|  | br i1 %5, label %12, label %6 | 
|  |  | 
|  | 6: | 
|  | %7 = bitcast <16 x i8> %2 to <4 x float> | 
|  | %8 = insertelement <4 x float> %7, float %1, i64 0 | 
|  | %9 = bitcast <4 x float> %8 to <16 x i8> | 
|  | %10 = bitcast <16 x i8>* %3 to <4 x float>* | 
|  | %11 = bitcast <16 x i8>* %3 to float* | 
|  | br label %13 | 
|  |  | 
|  | 12: | 
|  | ret void | 
|  |  | 
|  | 13: | 
|  | %14 = phi i32 [ 0, %6 ], [ %20, %13 ] | 
|  | %15 = load <4 x float>, <4 x float>* %10, align 8 | 
|  | %16 = insertelement <4 x float> %15, float %1, i64 0 | 
|  | %17 = bitcast <4 x float> %16 to <16 x i8> | 
|  | store float %1, float* %11, align 8 | 
|  | %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) | 
|  | %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) | 
|  | store <16 x i8> %19, <16 x i8>* %3, align 8 | 
|  | %20 = add nuw i32 %14, 1 | 
|  | %21 = icmp eq i32 %20, %0 | 
|  | br i1 %21, label %12, label %13 | 
|  | } | 
|  |  | 
|  | define arm_aapcs_vfpcc void @aese_constantisland(<16 x i8>* %0) nounwind { | 
|  | ; CHECK-FIX-NOSCHED-LABEL: aese_constantisland: | 
|  | ; CHECK-FIX-NOSCHED:       @ %bb.0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    adr r1, .LCPI92_0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d18, d19}, [r1:128] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aese.8 q9, q8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q9 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r0] | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    bx lr | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .p2align 4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:  .LCPI92_0: | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 0 @ 0x0 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 1 @ 0x1 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 2 @ 0x2 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 3 @ 0x3 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 4 @ 0x4 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 5 @ 0x5 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 6 @ 0x6 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 7 @ 0x7 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 8 @ 0x8 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 9 @ 0x9 | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 10 @ 0xa | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 11 @ 0xb | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 12 @ 0xc | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 13 @ 0xd | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 14 @ 0xe | 
|  | ; CHECK-FIX-NOSCHED-NEXT:    .byte 15 @ 0xf | 
|  | ; | 
|  | ; CHECK-CORTEX-FIX-LABEL: aese_constantisland: | 
|  | ; CHECK-CORTEX-FIX:       @ %bb.0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    adr r1, .LCPI92_0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d18, d19}, [r1:128] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aese.8 q9, q8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q9 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r0] | 
|  | ; CHECK-CORTEX-FIX-NEXT:    bx lr | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .p2align 4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:  @ %bb.1: | 
|  | ; CHECK-CORTEX-FIX-NEXT:  .LCPI92_0: | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 0 @ 0x0 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 1 @ 0x1 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 2 @ 0x2 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 3 @ 0x3 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 4 @ 0x4 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 5 @ 0x5 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 6 @ 0x6 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 7 @ 0x7 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 8 @ 0x8 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 9 @ 0x9 | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 10 @ 0xa | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 11 @ 0xb | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 12 @ 0xc | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 13 @ 0xd | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 14 @ 0xe | 
|  | ; CHECK-CORTEX-FIX-NEXT:    .byte 15 @ 0xf | 
|  | %2 = load <16 x i8>, <16 x i8>* %0, align 8 | 
|  | %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %2) | 
|  | %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3) | 
|  | store <16 x i8> %4, <16 x i8>* %0, align 8 | 
|  | ret void | 
|  | } |