| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
 | ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE | 
 | ; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE | 
 |  | 
 | define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4(ptr %vp) { | 
 | ; CHECK-LE-LABEL: load_4xi32_a4: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    vldrw.u32 q0, [r0] | 
 | ; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1 | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: load_4xi32_a4: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    vldrw.u32 q0, [r0] | 
 | ; CHECK-BE-NEXT:    vshr.u32 q1, q0, #1 | 
 | ; CHECK-BE-NEXT:    vrev64.32 q0, q1 | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %0 = load <4 x i32>, ptr %vp, align 4 | 
 |   %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> | 
 |   ret <4 x i32> %1 | 
 | } | 
 |  | 
 | define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a2(ptr %vp) { | 
 | ; CHECK-LE-LABEL: load_4xi32_a2: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    vldrh.u16 q0, [r0] | 
 | ; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1 | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: load_4xi32_a2: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    vldrb.u8 q0, [r0] | 
 | ; CHECK-BE-NEXT:    vrev32.8 q0, q0 | 
 | ; CHECK-BE-NEXT:    vshr.u32 q1, q0, #1 | 
 | ; CHECK-BE-NEXT:    vrev64.32 q0, q1 | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %0 = load <4 x i32>, ptr %vp, align 2 | 
 |   %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> | 
 |   ret <4 x i32> %1 | 
 | } | 
 |  | 
 | define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a1(ptr %vp) { | 
 | ; CHECK-LE-LABEL: load_4xi32_a1: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    vldrb.u8 q0, [r0] | 
 | ; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1 | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: load_4xi32_a1: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    vldrb.u8 q0, [r0] | 
 | ; CHECK-BE-NEXT:    vrev32.8 q0, q0 | 
 | ; CHECK-BE-NEXT:    vshr.u32 q1, q0, #1 | 
 | ; CHECK-BE-NEXT:    vrev64.32 q0, q1 | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %0 = load <4 x i32>, ptr %vp, align 1 | 
 |   %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> | 
 |   ret <4 x i32> %1 | 
 | } | 
 |  | 
 | define arm_aapcs_vfpcc void @store_4xi32_a4(ptr %vp, <4 x i32> %val) { | 
 | ; CHECK-LE-LABEL: store_4xi32_a4: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1 | 
 | ; CHECK-LE-NEXT:    vstrw.32 q0, [r0] | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: store_4xi32_a4: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    vrev64.32 q1, q0 | 
 | ; CHECK-BE-NEXT:    vshr.u32 q0, q1, #1 | 
 | ; CHECK-BE-NEXT:    vstrw.32 q0, [r0] | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1> | 
 |   store <4 x i32> %0, ptr %vp, align 4 | 
 |   ret void | 
 | } | 
 |  | 
 | define arm_aapcs_vfpcc void @store_4xi32_a2(ptr %vp, <4 x i32> %val) { | 
 | ; CHECK-LE-LABEL: store_4xi32_a2: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1 | 
 | ; CHECK-LE-NEXT:    vstrh.16 q0, [r0] | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: store_4xi32_a2: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    vrev64.32 q1, q0 | 
 | ; CHECK-BE-NEXT:    vshr.u32 q0, q1, #1 | 
 | ; CHECK-BE-NEXT:    vrev32.8 q0, q0 | 
 | ; CHECK-BE-NEXT:    vstrb.8 q0, [r0] | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1> | 
 |   store <4 x i32> %0, ptr %vp, align 2 | 
 |   ret void | 
 | } | 
 |  | 
 | define arm_aapcs_vfpcc void @store_4xi32_a1(ptr %vp, <4 x i32> %val) { | 
 | ; CHECK-LE-LABEL: store_4xi32_a1: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    vshr.u32 q0, q0, #1 | 
 | ; CHECK-LE-NEXT:    vstrb.8 q0, [r0] | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: store_4xi32_a1: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    vrev64.32 q1, q0 | 
 | ; CHECK-BE-NEXT:    vshr.u32 q0, q1, #1 | 
 | ; CHECK-BE-NEXT:    vrev32.8 q0, q0 | 
 | ; CHECK-BE-NEXT:    vstrb.8 q0, [r0] | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1> | 
 |   store <4 x i32> %0, ptr %vp, align 1 | 
 |   ret void | 
 | } | 
 |  | 
 | define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_pos(ptr %ip) { | 
 | ; CHECK-LE-LABEL: load_4xi32_a4_offset_pos: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    vldrw.u32 q0, [r0, #508] | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: load_4xi32_a4_offset_pos: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    add.w r0, r0, #508 | 
 | ; CHECK-BE-NEXT:    vldrb.u8 q1, [r0] | 
 | ; CHECK-BE-NEXT:    vrev64.8 q0, q1 | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %ipoffset = getelementptr inbounds i32, ptr %ip, i32 127 | 
 |   %0 = load <4 x i32>, ptr %ipoffset, align 4 | 
 |   ret <4 x i32> %0 | 
 | } | 
 |  | 
 | define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_neg(ptr %ip) { | 
 | ; CHECK-LE-LABEL: load_4xi32_a4_offset_neg: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    vldrw.u32 q0, [r0, #-508] | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: load_4xi32_a4_offset_neg: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    sub.w r0, r0, #508 | 
 | ; CHECK-BE-NEXT:    vldrb.u8 q1, [r0] | 
 | ; CHECK-BE-NEXT:    vrev64.8 q0, q1 | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %ipoffset = getelementptr inbounds i32, ptr %ip, i32 -127 | 
 |   %0 = load <4 x i32>, ptr %ipoffset, align 4 | 
 |   ret <4 x i32> %0 | 
 | } | 
 |  | 
 | define arm_aapcs_vfpcc <4 x i32> @loadstore_4xi32_stack_off16() { | 
 | ; CHECK-LE-LABEL: loadstore_4xi32_stack_off16: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    .pad #40 | 
 | ; CHECK-LE-NEXT:    sub sp, #40 | 
 | ; CHECK-LE-NEXT:    vmov.i32 q0, #0x1 | 
 | ; CHECK-LE-NEXT:    mov r0, sp | 
 | ; CHECK-LE-NEXT:    vstrw.32 q0, [r0] | 
 | ; CHECK-LE-NEXT:    movs r0, #3 | 
 | ; CHECK-LE-NEXT:    vstrw.32 q0, [sp, #16] | 
 | ; CHECK-LE-NEXT:    str r0, [sp, #16] | 
 | ; CHECK-LE-NEXT:    vldrw.u32 q0, [sp, #16] | 
 | ; CHECK-LE-NEXT:    add sp, #40 | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: loadstore_4xi32_stack_off16: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    .pad #40 | 
 | ; CHECK-BE-NEXT:    sub sp, #40 | 
 | ; CHECK-BE-NEXT:    vmov.i32 q0, #0x1 | 
 | ; CHECK-BE-NEXT:    mov r0, sp | 
 | ; CHECK-BE-NEXT:    vstrw.32 q0, [r0] | 
 | ; CHECK-BE-NEXT:    movs r0, #3 | 
 | ; CHECK-BE-NEXT:    vstrw.32 q0, [sp, #16] | 
 | ; CHECK-BE-NEXT:    str r0, [sp, #16] | 
 | ; CHECK-BE-NEXT:    vldrb.u8 q1, [sp, #16] | 
 | ; CHECK-BE-NEXT:    vrev64.8 q0, q1 | 
 | ; CHECK-BE-NEXT:    add sp, #40 | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %c = alloca [1 x [5 x [2 x i32]]], align 4 | 
 |   store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %c, align 4 | 
 |   %arrayidx5.2 = getelementptr inbounds [1 x [5 x [2 x i32]]], ptr %c, i32 0, i32 0, i32 2, i32 0 | 
 |   store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %arrayidx5.2, align 4 | 
 |   store i32 3, ptr %arrayidx5.2, align 4 | 
 |   %0 = load <4 x i32>, ptr %arrayidx5.2, align 4 | 
 |   ret <4 x i32> %0 | 
 | } | 
 |  | 
 | define arm_aapcs_vfpcc <8 x i16> @loadstore_8xi16_stack_off16() { | 
 | ; CHECK-LE-LABEL: loadstore_8xi16_stack_off16: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    .pad #40 | 
 | ; CHECK-LE-NEXT:    sub sp, #40 | 
 | ; CHECK-LE-NEXT:    vmov.i16 q0, #0x1 | 
 | ; CHECK-LE-NEXT:    mov r0, sp | 
 | ; CHECK-LE-NEXT:    vstrh.16 q0, [r0] | 
 | ; CHECK-LE-NEXT:    movs r0, #3 | 
 | ; CHECK-LE-NEXT:    vstrh.16 q0, [sp, #16] | 
 | ; CHECK-LE-NEXT:    strh.w r0, [sp, #16] | 
 | ; CHECK-LE-NEXT:    vldrh.u16 q0, [sp, #16] | 
 | ; CHECK-LE-NEXT:    add sp, #40 | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: loadstore_8xi16_stack_off16: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    .pad #40 | 
 | ; CHECK-BE-NEXT:    sub sp, #40 | 
 | ; CHECK-BE-NEXT:    vmov.i16 q0, #0x1 | 
 | ; CHECK-BE-NEXT:    mov r0, sp | 
 | ; CHECK-BE-NEXT:    vstrh.16 q0, [r0] | 
 | ; CHECK-BE-NEXT:    movs r0, #3 | 
 | ; CHECK-BE-NEXT:    vstrh.16 q0, [sp, #16] | 
 | ; CHECK-BE-NEXT:    strh.w r0, [sp, #16] | 
 | ; CHECK-BE-NEXT:    vldrb.u8 q1, [sp, #16] | 
 | ; CHECK-BE-NEXT:    vrev64.8 q0, q1 | 
 | ; CHECK-BE-NEXT:    add sp, #40 | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %c = alloca [1 x [10 x [2 x i16]]], align 2 | 
 |   store <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, ptr %c, align 2 | 
 |   %arrayidx5.2 = getelementptr inbounds [1 x [10 x [2 x i16]]], ptr %c, i32 0, i32 0, i32 4, i32 0 | 
 |   store <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, ptr %arrayidx5.2, align 2 | 
 |   store i16 3, ptr %arrayidx5.2, align 2 | 
 |   %0 = load <8 x i16>, ptr %arrayidx5.2, align 2 | 
 |   ret <8 x i16> %0 | 
 | } | 
 |  | 
 | define arm_aapcs_vfpcc <16 x i8> @loadstore_16xi8_stack_off16() { | 
 | ; CHECK-LE-LABEL: loadstore_16xi8_stack_off16: | 
 | ; CHECK-LE:       @ %bb.0: @ %entry | 
 | ; CHECK-LE-NEXT:    .pad #40 | 
 | ; CHECK-LE-NEXT:    sub sp, #40 | 
 | ; CHECK-LE-NEXT:    vmov.i8 q0, #0x1 | 
 | ; CHECK-LE-NEXT:    mov r0, sp | 
 | ; CHECK-LE-NEXT:    vstrb.8 q0, [r0] | 
 | ; CHECK-LE-NEXT:    movs r0, #3 | 
 | ; CHECK-LE-NEXT:    vstrb.8 q0, [sp, #16] | 
 | ; CHECK-LE-NEXT:    strb.w r0, [sp, #16] | 
 | ; CHECK-LE-NEXT:    vldrb.u8 q0, [sp, #16] | 
 | ; CHECK-LE-NEXT:    add sp, #40 | 
 | ; CHECK-LE-NEXT:    bx lr | 
 | ; | 
 | ; CHECK-BE-LABEL: loadstore_16xi8_stack_off16: | 
 | ; CHECK-BE:       @ %bb.0: @ %entry | 
 | ; CHECK-BE-NEXT:    .pad #40 | 
 | ; CHECK-BE-NEXT:    sub sp, #40 | 
 | ; CHECK-BE-NEXT:    vmov.i8 q0, #0x1 | 
 | ; CHECK-BE-NEXT:    mov r0, sp | 
 | ; CHECK-BE-NEXT:    vstrb.8 q0, [r0] | 
 | ; CHECK-BE-NEXT:    movs r0, #3 | 
 | ; CHECK-BE-NEXT:    vstrb.8 q0, [sp, #16] | 
 | ; CHECK-BE-NEXT:    strb.w r0, [sp, #16] | 
 | ; CHECK-BE-NEXT:    vldrb.u8 q1, [sp, #16] | 
 | ; CHECK-BE-NEXT:    vrev64.8 q0, q1 | 
 | ; CHECK-BE-NEXT:    add sp, #40 | 
 | ; CHECK-BE-NEXT:    bx lr | 
 | entry: | 
 |   %c = alloca [1 x [20 x [2 x i8]]], align 1 | 
 |   store <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, ptr %c, align 1 | 
 |   %arrayidx5.2 = getelementptr inbounds [1 x [20 x [2 x i8]]], ptr %c, i32 0, i32 0, i32 8, i32 0 | 
 |   store <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, ptr %arrayidx5.2, align 1 | 
 |   store i8 3, ptr %arrayidx5.2, align 1 | 
 |   %0 = load <16 x i8>, ptr %arrayidx5.2, align 1 | 
 |   ret <16 x i8> %0 | 
 | } |