| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - -opaque-pointers | FileCheck %s |
| |
| define void @ptr_iv_v4i32(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) { |
| ; CHECK-LABEL: ptr_iv_v4i32: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r7, lr} |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: mov.w lr, #249 |
| ; CHECK-NEXT: adr r3, .LCPI0_0 |
| ; CHECK-NEXT: vldrw.u32 q0, [r3] |
| ; CHECK-NEXT: .LBB0_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: adds r0, #64 |
| ; CHECK-NEXT: vadd.i32 q1, q1, r2 |
| ; CHECK-NEXT: vstrw.32 q1, [r1, q0, uxtw #2] |
| ; CHECK-NEXT: adds r1, #64 |
| ; CHECK-NEXT: le lr, .LBB0_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: pop {r7, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI0_0: |
| ; CHECK-NEXT: .long 0 @ 0x0 |
| ; CHECK-NEXT: .long 4 @ 0x4 |
| ; CHECK-NEXT: .long 8 @ 0x8 |
| ; CHECK-NEXT: .long 12 @ 0xc |
| vector.ph: |
| %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %y, i32 0 |
| %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi i32* [ %A, %vector.ph ], [ %0, %vector.body ] |
| %pointer.phi13 = phi i32* [ %B, %vector.ph ], [ %2, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %0 = getelementptr i32, i32* %pointer.phi, i32 16 |
| %1 = getelementptr i32, i32* %pointer.phi, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %2 = getelementptr i32, i32* %pointer.phi13, i32 16 |
| %3 = getelementptr i32, i32* %pointer.phi13, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) |
| %4 = add nsw <4 x i32> %wide.masked.gather, %broadcast.splat |
| call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %4, <4 x i32*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %5 = icmp eq i32 %index.next, 996 |
| br i1 %5, label %end, label %vector.body |
| |
| end: |
| ret void |
| } |
| |
| define void @ptr_iv_v4i32_mult(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) { |
| ; CHECK-LABEL: ptr_iv_v4i32_mult: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r7, lr} |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: mov.w lr, #249 |
| ; CHECK-NEXT: adr r1, .LCPI1_0 |
| ; CHECK-NEXT: adr r3, .LCPI1_1 |
| ; CHECK-NEXT: vldrw.u32 q0, [r3] |
| ; CHECK-NEXT: vldrw.u32 q1, [r1] |
| ; CHECK-NEXT: .LBB1_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrw.u32 q2, [r0, q0] |
| ; CHECK-NEXT: vadd.i32 q2, q2, r2 |
| ; CHECK-NEXT: vstrw.32 q2, [r0, q1] |
| ; CHECK-NEXT: adds r0, #64 |
| ; CHECK-NEXT: le lr, .LBB1_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: pop {r7, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI1_0: |
| ; CHECK-NEXT: .long 20 @ 0x14 |
| ; CHECK-NEXT: .long 36 @ 0x24 |
| ; CHECK-NEXT: .long 52 @ 0x34 |
| ; CHECK-NEXT: .long 68 @ 0x44 |
| ; CHECK-NEXT: .LCPI1_1: |
| ; CHECK-NEXT: .long 12 @ 0xc |
| ; CHECK-NEXT: .long 28 @ 0x1c |
| ; CHECK-NEXT: .long 44 @ 0x2c |
| ; CHECK-NEXT: .long 60 @ 0x3c |
| vector.ph: |
| %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %y, i32 0 |
| %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi i32* [ %A, %vector.ph ], [ %0, %vector.body ] |
| %pointer.phi13 = phi i32* [ %B, %vector.ph ], [ %2, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %0 = getelementptr i32, i32* %pointer.phi, i32 16 |
| %1 = getelementptr i32, i32* %pointer.phi, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %gather.address = getelementptr i32, <4 x i32*> %1, i32 3 |
| %2 = getelementptr i32, i32* %pointer.phi13, i32 16 |
| %3 = getelementptr i32, i32* %pointer.phi13, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %scatter.address = getelementptr i32, <4 x i32*> %1, i32 5 |
| %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gather.address, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) |
| %4 = add nsw <4 x i32> %wide.masked.gather, %broadcast.splat |
| call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %4, <4 x i32*> %scatter.address, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %5 = icmp eq i32 %index.next, 996 |
| br i1 %5, label %end, label %vector.body |
| |
| end: |
| ret void |
| } |
| |
| define void @ptr_iv_v8i16(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i16 %y) { |
| ; CHECK-LABEL: ptr_iv_v8i16: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r7, lr} |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: mov.w lr, #249 |
| ; CHECK-NEXT: adr r3, .LCPI2_0 |
| ; CHECK-NEXT: vldrw.u32 q0, [r3] |
| ; CHECK-NEXT: .LBB2_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: adds r0, #64 |
| ; CHECK-NEXT: vadd.i16 q1, q1, r2 |
| ; CHECK-NEXT: vstrh.16 q1, [r1, q0, uxtw #1] |
| ; CHECK-NEXT: adds r1, #64 |
| ; CHECK-NEXT: le lr, .LBB2_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: pop {r7, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI2_0: |
| ; CHECK-NEXT: .short 0 @ 0x0 |
| ; CHECK-NEXT: .short 4 @ 0x4 |
| ; CHECK-NEXT: .short 8 @ 0x8 |
| ; CHECK-NEXT: .short 12 @ 0xc |
| ; CHECK-NEXT: .short 16 @ 0x10 |
| ; CHECK-NEXT: .short 20 @ 0x14 |
| ; CHECK-NEXT: .short 24 @ 0x18 |
| ; CHECK-NEXT: .short 28 @ 0x1c |
| vector.ph: |
| %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %y, i32 0 |
| %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi i16* [ %A, %vector.ph ], [ %0, %vector.body ] |
| %pointer.phi13 = phi i16* [ %B, %vector.ph ], [ %2, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %0 = getelementptr i16, i16* %pointer.phi, i32 32 |
| %1 = getelementptr i16, i16* %pointer.phi, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28> |
| %2 = getelementptr i16, i16* %pointer.phi13, i32 32 |
| %3 = getelementptr i16, i16* %pointer.phi13, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28> |
| %wide.masked.gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %1, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) |
| %4 = add nsw <8 x i16> %wide.masked.gather, %broadcast.splat |
| call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %4, <8 x i16*> %3, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %5 = icmp eq i32 %index.next, 996 |
| br i1 %5, label %end, label %vector.body |
| |
| end: |
| ret void |
| } |
| |
| |
| define void @ptr_iv_v8i16_mult(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i16 %y) { |
| ; CHECK-LABEL: ptr_iv_v8i16_mult: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r7, lr} |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: mov.w lr, #249 |
| ; CHECK-NEXT: adr.w r12, .LCPI3_0 |
| ; CHECK-NEXT: adr r3, .LCPI3_1 |
| ; CHECK-NEXT: vldrw.u32 q0, [r3] |
| ; CHECK-NEXT: vldrw.u32 q1, [r12] |
| ; CHECK-NEXT: .LBB3_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrh.u16 q2, [r0, q0] |
| ; CHECK-NEXT: adds r0, #64 |
| ; CHECK-NEXT: vadd.i16 q2, q2, r2 |
| ; CHECK-NEXT: vstrh.16 q2, [r1, q1] |
| ; CHECK-NEXT: adds r1, #64 |
| ; CHECK-NEXT: le lr, .LBB3_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: pop {r7, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI3_0: |
| ; CHECK-NEXT: .short 10 @ 0xa |
| ; CHECK-NEXT: .short 18 @ 0x12 |
| ; CHECK-NEXT: .short 26 @ 0x1a |
| ; CHECK-NEXT: .short 34 @ 0x22 |
| ; CHECK-NEXT: .short 42 @ 0x2a |
| ; CHECK-NEXT: .short 50 @ 0x32 |
| ; CHECK-NEXT: .short 58 @ 0x3a |
| ; CHECK-NEXT: .short 66 @ 0x42 |
| ; CHECK-NEXT: .LCPI3_1: |
| ; CHECK-NEXT: .short 6 @ 0x6 |
| ; CHECK-NEXT: .short 14 @ 0xe |
| ; CHECK-NEXT: .short 22 @ 0x16 |
| ; CHECK-NEXT: .short 30 @ 0x1e |
| ; CHECK-NEXT: .short 38 @ 0x26 |
| ; CHECK-NEXT: .short 46 @ 0x2e |
| ; CHECK-NEXT: .short 54 @ 0x36 |
| ; CHECK-NEXT: .short 62 @ 0x3e |
| vector.ph: |
| %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %y, i32 0 |
| %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi i16* [ %A, %vector.ph ], [ %0, %vector.body ] |
| %pointer.phi13 = phi i16* [ %B, %vector.ph ], [ %2, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %0 = getelementptr i16, i16* %pointer.phi, i32 32 |
| %1 = getelementptr i16, i16* %pointer.phi, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28> |
| %gather.address = getelementptr i16, <8 x i16*> %1, i16 3 |
| %2 = getelementptr i16, i16* %pointer.phi13, i32 32 |
| %3 = getelementptr i16, i16* %pointer.phi13, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28> |
| %scatter.address = getelementptr i16, <8 x i16*> %3, i16 5 |
| %wide.masked.gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gather.address, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) |
| %4 = add nsw <8 x i16> %wide.masked.gather, %broadcast.splat |
| call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %4, <8 x i16*> %scatter.address, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %5 = icmp eq i32 %index.next, 996 |
| br i1 %5, label %end, label %vector.body |
| |
| end: |
| ret void |
| } |
| |
| define void @ptr_iv_v16i8(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i8 %y) { |
| ; CHECK-LABEL: ptr_iv_v16i8: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r7, lr} |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: mov.w lr, #249 |
| ; CHECK-NEXT: adr r3, .LCPI4_0 |
| ; CHECK-NEXT: vldrw.u32 q0, [r3] |
| ; CHECK-NEXT: .LBB4_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrb.u8 q1, [r0, q0] |
| ; CHECK-NEXT: adds r0, #64 |
| ; CHECK-NEXT: vadd.i8 q1, q1, r2 |
| ; CHECK-NEXT: vstrb.8 q1, [r1, q0] |
| ; CHECK-NEXT: adds r1, #64 |
| ; CHECK-NEXT: le lr, .LBB4_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: pop {r7, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI4_0: |
| ; CHECK-NEXT: .byte 0 @ 0x0 |
| ; CHECK-NEXT: .byte 4 @ 0x4 |
| ; CHECK-NEXT: .byte 8 @ 0x8 |
| ; CHECK-NEXT: .byte 12 @ 0xc |
| ; CHECK-NEXT: .byte 16 @ 0x10 |
| ; CHECK-NEXT: .byte 20 @ 0x14 |
| ; CHECK-NEXT: .byte 24 @ 0x18 |
| ; CHECK-NEXT: .byte 28 @ 0x1c |
| ; CHECK-NEXT: .byte 32 @ 0x20 |
| ; CHECK-NEXT: .byte 36 @ 0x24 |
| ; CHECK-NEXT: .byte 40 @ 0x28 |
| ; CHECK-NEXT: .byte 44 @ 0x2c |
| ; CHECK-NEXT: .byte 48 @ 0x30 |
| ; CHECK-NEXT: .byte 52 @ 0x34 |
| ; CHECK-NEXT: .byte 56 @ 0x38 |
| ; CHECK-NEXT: .byte 60 @ 0x3c |
| vector.ph: ; preds = %entry |
| %broadcast.splatinsert = insertelement <16 x i8> undef, i8 %y, i32 0 |
| %broadcast.splat = shufflevector <16 x i8> %broadcast.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi i8* [ %A, %vector.ph ], [ %0, %vector.body ] |
| %pointer.phi13 = phi i8* [ %B, %vector.ph ], [ %2, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %0 = getelementptr i8, i8* %pointer.phi, i32 64 |
| %1 = getelementptr i8, i8* %pointer.phi, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60> |
| %2 = getelementptr i8, i8* %pointer.phi13, i32 64 |
| %3 = getelementptr i8, i8* %pointer.phi13, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60> |
| %wide.masked.gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %1, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) |
| %4 = add nsw <16 x i8> %wide.masked.gather, %broadcast.splat |
| call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %4, <16 x i8*> %3, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %5 = icmp eq i32 %index.next, 996 |
| br i1 %5, label %end, label %vector.body |
| |
| end: |
| ret void |
| } |
| |
| |
| define void @ptr_iv_v16i8_mult(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i8 %y) { |
| ; CHECK-LABEL: ptr_iv_v16i8_mult: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r7, lr} |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: mov.w lr, #249 |
| ; CHECK-NEXT: adr.w r12, .LCPI5_0 |
| ; CHECK-NEXT: adr r3, .LCPI5_1 |
| ; CHECK-NEXT: vldrw.u32 q0, [r3] |
| ; CHECK-NEXT: vldrw.u32 q1, [r12] |
| ; CHECK-NEXT: .LBB5_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrb.u8 q2, [r0, q0] |
| ; CHECK-NEXT: adds r0, #64 |
| ; CHECK-NEXT: vadd.i8 q2, q2, r2 |
| ; CHECK-NEXT: vstrb.8 q2, [r1, q1] |
| ; CHECK-NEXT: adds r1, #64 |
| ; CHECK-NEXT: le lr, .LBB5_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: pop {r7, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI5_0: |
| ; CHECK-NEXT: .byte 5 @ 0x5 |
| ; CHECK-NEXT: .byte 9 @ 0x9 |
| ; CHECK-NEXT: .byte 13 @ 0xd |
| ; CHECK-NEXT: .byte 17 @ 0x11 |
| ; CHECK-NEXT: .byte 21 @ 0x15 |
| ; CHECK-NEXT: .byte 25 @ 0x19 |
| ; CHECK-NEXT: .byte 29 @ 0x1d |
| ; CHECK-NEXT: .byte 33 @ 0x21 |
| ; CHECK-NEXT: .byte 37 @ 0x25 |
| ; CHECK-NEXT: .byte 41 @ 0x29 |
| ; CHECK-NEXT: .byte 45 @ 0x2d |
| ; CHECK-NEXT: .byte 49 @ 0x31 |
| ; CHECK-NEXT: .byte 53 @ 0x35 |
| ; CHECK-NEXT: .byte 57 @ 0x39 |
| ; CHECK-NEXT: .byte 61 @ 0x3d |
| ; CHECK-NEXT: .byte 65 @ 0x41 |
| ; CHECK-NEXT: .LCPI5_1: |
| ; CHECK-NEXT: .byte 3 @ 0x3 |
| ; CHECK-NEXT: .byte 7 @ 0x7 |
| ; CHECK-NEXT: .byte 11 @ 0xb |
| ; CHECK-NEXT: .byte 15 @ 0xf |
| ; CHECK-NEXT: .byte 19 @ 0x13 |
| ; CHECK-NEXT: .byte 23 @ 0x17 |
| ; CHECK-NEXT: .byte 27 @ 0x1b |
| ; CHECK-NEXT: .byte 31 @ 0x1f |
| ; CHECK-NEXT: .byte 35 @ 0x23 |
| ; CHECK-NEXT: .byte 39 @ 0x27 |
| ; CHECK-NEXT: .byte 43 @ 0x2b |
| ; CHECK-NEXT: .byte 47 @ 0x2f |
| ; CHECK-NEXT: .byte 51 @ 0x33 |
| ; CHECK-NEXT: .byte 55 @ 0x37 |
| ; CHECK-NEXT: .byte 59 @ 0x3b |
| ; CHECK-NEXT: .byte 63 @ 0x3f |
| vector.ph: ; preds = %entry |
| %broadcast.splatinsert = insertelement <16 x i8> undef, i8 %y, i32 0 |
| %broadcast.splat = shufflevector <16 x i8> %broadcast.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi i8* [ %A, %vector.ph ], [ %0, %vector.body ] |
| %pointer.phi13 = phi i8* [ %B, %vector.ph ], [ %2, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %0 = getelementptr i8, i8* %pointer.phi, i32 64 |
| %1 = getelementptr i8, i8* %pointer.phi, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60> |
| %gather.address = getelementptr i8, <16 x i8*> %1, i8 3 |
| %2 = getelementptr i8, i8* %pointer.phi13, i32 64 |
| %3 = getelementptr i8, i8* %pointer.phi13, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60> |
| %scatter.address = getelementptr i8, <16 x i8*> %3, i8 5 |
| %wide.masked.gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gather.address, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) |
| %4 = add nsw <16 x i8> %wide.masked.gather, %broadcast.splat |
| call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %4, <16 x i8*> %scatter.address, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %5 = icmp eq i32 %index.next, 996 |
| br i1 %5, label %end, label %vector.body |
| |
| end: |
| ret void |
| } |
| |
| define void @ptr_iv_v4f32(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) { |
| ; CHECK-LABEL: ptr_iv_v4f32: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r7, lr} |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: mov.w lr, #249 |
| ; CHECK-NEXT: adr r3, .LCPI6_0 |
| ; CHECK-NEXT: vldrw.u32 q0, [r3] |
| ; CHECK-NEXT: .LBB6_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] |
| ; CHECK-NEXT: adds r0, #64 |
| ; CHECK-NEXT: vadd.f32 q1, q1, r2 |
| ; CHECK-NEXT: vstrw.32 q1, [r1, q0, uxtw #2] |
| ; CHECK-NEXT: adds r1, #64 |
| ; CHECK-NEXT: le lr, .LBB6_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: pop {r7, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI6_0: |
| ; CHECK-NEXT: .long 0 @ 0x0 |
| ; CHECK-NEXT: .long 4 @ 0x4 |
| ; CHECK-NEXT: .long 8 @ 0x8 |
| ; CHECK-NEXT: .long 12 @ 0xc |
| vector.ph: ; preds = %entry |
| %broadcast.splatinsert = insertelement <4 x float> undef, float %y, i32 0 |
| %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi float* [ %A, %vector.ph ], [ %0, %vector.body ] |
| %pointer.phi13 = phi float* [ %B, %vector.ph ], [ %2, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %0 = getelementptr float, float* %pointer.phi, i32 16 |
| %1 = getelementptr float, float* %pointer.phi, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %2 = getelementptr float, float* %pointer.phi13, i32 16 |
| %3 = getelementptr float, float* %pointer.phi13, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %wide.masked.gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) |
| %4 = fadd <4 x float> %wide.masked.gather, %broadcast.splat |
| call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %4, <4 x float*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %5 = icmp eq i32 %index.next, 996 |
| br i1 %5, label %end, label %vector.body |
| |
| end: |
| ret void |
| } |
| |
| define void @ptr_iv_v4f32_mult(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) { |
| ; CHECK-LABEL: ptr_iv_v4f32_mult: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r7, lr} |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: mov.w lr, #249 |
| ; CHECK-NEXT: adr r1, .LCPI7_0 |
| ; CHECK-NEXT: adr r3, .LCPI7_1 |
| ; CHECK-NEXT: vldrw.u32 q0, [r3] |
| ; CHECK-NEXT: vldrw.u32 q1, [r1] |
| ; CHECK-NEXT: .LBB7_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrw.u32 q2, [r0, q0] |
| ; CHECK-NEXT: vadd.f32 q2, q2, r2 |
| ; CHECK-NEXT: vstrw.32 q2, [r0, q1] |
| ; CHECK-NEXT: adds r0, #64 |
| ; CHECK-NEXT: le lr, .LBB7_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: pop {r7, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI7_0: |
| ; CHECK-NEXT: .long 20 @ 0x14 |
| ; CHECK-NEXT: .long 36 @ 0x24 |
| ; CHECK-NEXT: .long 52 @ 0x34 |
| ; CHECK-NEXT: .long 68 @ 0x44 |
| ; CHECK-NEXT: .LCPI7_1: |
| ; CHECK-NEXT: .long 12 @ 0xc |
| ; CHECK-NEXT: .long 28 @ 0x1c |
| ; CHECK-NEXT: .long 44 @ 0x2c |
| ; CHECK-NEXT: .long 60 @ 0x3c |
| vector.ph: ; preds = %entry |
| %broadcast.splatinsert = insertelement <4 x float> undef, float %y, i32 0 |
| %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi float* [ %A, %vector.ph ], [ %0, %vector.body ] |
| %pointer.phi13 = phi float* [ %B, %vector.ph ], [ %2, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %0 = getelementptr float, float* %pointer.phi, i32 16 |
| %1 = getelementptr float, float* %pointer.phi, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %gather.address = getelementptr float, <4 x float*> %1, i32 3 |
| %2 = getelementptr float, float* %pointer.phi13, i32 16 |
| %3 = getelementptr float, float* %pointer.phi13, <4 x i32> <i32 0, i32 4, i32 8, i32 12> |
| %scatter.address = getelementptr float, <4 x float*> %1, i32 5 |
| %wide.masked.gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gather.address, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) |
| %4 = fadd <4 x float> %wide.masked.gather, %broadcast.splat |
| call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %4, <4 x float*> %scatter.address, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %5 = icmp eq i32 %index.next, 996 |
| br i1 %5, label %end, label %vector.body |
| |
| end: |
| ret void |
| } |
| |
| define void @ptr_iv_v8f16(half* noalias nocapture readonly %A, half* noalias nocapture %B, float %y) { |
| ; CHECK-LABEL: ptr_iv_v8f16: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r7, lr} |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: vmov s0, r2 |
| ; CHECK-NEXT: mov.w lr, #249 |
| ; CHECK-NEXT: vcvtb.f16.f32 s0, s0 |
| ; CHECK-NEXT: adr r3, .LCPI8_0 |
| ; CHECK-NEXT: vmov.f16 r2, s0 |
| ; CHECK-NEXT: vldrw.u32 q0, [r3] |
| ; CHECK-NEXT: .LBB8_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] |
| ; CHECK-NEXT: adds r0, #64 |
| ; CHECK-NEXT: vadd.f16 q1, q1, r2 |
| ; CHECK-NEXT: vstrh.16 q1, [r1, q0, uxtw #1] |
| ; CHECK-NEXT: adds r1, #64 |
| ; CHECK-NEXT: le lr, .LBB8_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: pop {r7, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI8_0: |
| ; CHECK-NEXT: .short 0 @ 0x0 |
| ; CHECK-NEXT: .short 4 @ 0x4 |
| ; CHECK-NEXT: .short 8 @ 0x8 |
| ; CHECK-NEXT: .short 12 @ 0xc |
| ; CHECK-NEXT: .short 16 @ 0x10 |
| ; CHECK-NEXT: .short 20 @ 0x14 |
| ; CHECK-NEXT: .short 24 @ 0x18 |
| ; CHECK-NEXT: .short 28 @ 0x1c |
| vector.ph: |
| %y.trunc = fptrunc float %y to half |
| %broadcast.splatinsert = insertelement <8 x half> undef, half %y.trunc, i32 0 |
| %broadcast.splat = shufflevector <8 x half> %broadcast.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi half* [ %A, %vector.ph ], [ %0, %vector.body ] |
| %pointer.phi13 = phi half* [ %B, %vector.ph ], [ %2, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %0 = getelementptr half, half* %pointer.phi, i32 32 |
| %1 = getelementptr half, half* %pointer.phi, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28> |
| %2 = getelementptr half, half* %pointer.phi13, i32 32 |
| %3 = getelementptr half, half* %pointer.phi13, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28> |
| %wide.masked.gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %1, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef) |
| %4 = fadd <8 x half> %wide.masked.gather, %broadcast.splat |
| call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %4, <8 x half*> %3, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %5 = icmp eq i32 %index.next, 996 |
| br i1 %5, label %end, label %vector.body |
| |
| end: |
| ret void |
| } |
| |
| define void @ptr_iv_v8f16_mult(half* noalias nocapture readonly %A, half* noalias nocapture %B, float %y) { |
| ; CHECK-LABEL: ptr_iv_v8f16_mult: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r7, lr} |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: vmov s0, r2 |
| ; CHECK-NEXT: adr r2, .LCPI9_0 |
| ; CHECK-NEXT: vcvtb.f16.f32 s0, s0 |
| ; CHECK-NEXT: mov.w lr, #249 |
| ; CHECK-NEXT: vmov.f16 r1, s0 |
| ; CHECK-NEXT: vldrw.u32 q0, [r2] |
| ; CHECK-NEXT: adr r2, .LCPI9_1 |
| ; CHECK-NEXT: vldrw.u32 q1, [r2] |
| ; CHECK-NEXT: .LBB9_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrh.u16 q2, [r0, q0] |
| ; CHECK-NEXT: vadd.f16 q2, q2, r1 |
| ; CHECK-NEXT: vstrh.16 q2, [r0, q1] |
| ; CHECK-NEXT: adds r0, #64 |
| ; CHECK-NEXT: le lr, .LBB9_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: pop {r7, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI9_0: |
| ; CHECK-NEXT: .short 6 @ 0x6 |
| ; CHECK-NEXT: .short 14 @ 0xe |
| ; CHECK-NEXT: .short 22 @ 0x16 |
| ; CHECK-NEXT: .short 30 @ 0x1e |
| ; CHECK-NEXT: .short 38 @ 0x26 |
| ; CHECK-NEXT: .short 46 @ 0x2e |
| ; CHECK-NEXT: .short 54 @ 0x36 |
| ; CHECK-NEXT: .short 62 @ 0x3e |
| ; CHECK-NEXT: .LCPI9_1: |
| ; CHECK-NEXT: .short 10 @ 0xa |
| ; CHECK-NEXT: .short 18 @ 0x12 |
| ; CHECK-NEXT: .short 26 @ 0x1a |
| ; CHECK-NEXT: .short 34 @ 0x22 |
| ; CHECK-NEXT: .short 42 @ 0x2a |
| ; CHECK-NEXT: .short 50 @ 0x32 |
| ; CHECK-NEXT: .short 58 @ 0x3a |
| ; CHECK-NEXT: .short 66 @ 0x42 |
| vector.ph: |
| %y.trunc = fptrunc float %y to half |
| %broadcast.splatinsert = insertelement <8 x half> undef, half %y.trunc, i32 0 |
| %broadcast.splat = shufflevector <8 x half> %broadcast.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi half* [ %A, %vector.ph ], [ %0, %vector.body ] |
| %pointer.phi13 = phi half* [ %B, %vector.ph ], [ %2, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %0 = getelementptr half, half* %pointer.phi, i32 32 |
| %1 = getelementptr half, half* %pointer.phi, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28> |
| %gather.address = getelementptr half, <8 x half*> %1, i32 3 |
| %2 = getelementptr half, half* %pointer.phi13, i32 32 |
| %3 = getelementptr half, half* %pointer.phi13, <8 x i16> <i16 0, i16 4, i16 8, i16 12, i16 16, i16 20, i16 24, i16 28> |
| %scatter.address = getelementptr half, <8 x half*> %1, i32 5 |
| %wide.masked.gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gather.address, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef) |
| %4 = fadd <8 x half> %wide.masked.gather, %broadcast.splat |
| call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %4, <8 x half*> %scatter.address, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %5 = icmp eq i32 %index.next, 996 |
| br i1 %5, label %end, label %vector.body |
| |
| end: |
| ret void |
| } |
| |
| |
| define arm_aapcs_vfpcc void @three_pointer_iv_v4i32(i32* nocapture readonly %x, i32* nocapture %z, i32 %n) { |
| ; CHECK-LABEL: three_pointer_iv_v4i32: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r4, lr} |
| ; CHECK-NEXT: push {r4, lr} |
| ; CHECK-NEXT: .vsave {d8, d9, d10, d11} |
| ; CHECK-NEXT: vpush {d8, d9, d10, d11} |
| ; CHECK-NEXT: adr.w r12, .LCPI10_0 |
| ; CHECK-NEXT: adr.w lr, .LCPI10_1 |
| ; CHECK-NEXT: adr r4, .LCPI10_2 |
| ; CHECK-NEXT: vldrw.u32 q1, [lr] |
| ; CHECK-NEXT: vldrw.u32 q0, [r4] |
| ; CHECK-NEXT: vldrw.u32 q2, [r12] |
| ; CHECK-NEXT: movs r3, #10 |
| ; CHECK-NEXT: .LBB10_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrw.u32 q3, [r0, q0] |
| ; CHECK-NEXT: vldrw.u32 q4, [r0, q1, uxtw #2] |
| ; CHECK-NEXT: vldrw.u32 q5, [r0, q2] |
| ; CHECK-NEXT: subs r2, #4 |
| ; CHECK-NEXT: vmul.i32 q3, q4, q3 |
| ; CHECK-NEXT: add.w r0, r0, #48 |
| ; CHECK-NEXT: vmul.i32 q5, q4, q5 |
| ; CHECK-NEXT: vmul.i32 q4, q4, r3 |
| ; CHECK-NEXT: vstrw.32 q4, [r1, q1, uxtw #2] |
| ; CHECK-NEXT: vstrw.32 q5, [r1, q2] |
| ; CHECK-NEXT: vstrw.32 q3, [r1, q0] |
| ; CHECK-NEXT: add.w r1, r1, #48 |
| ; CHECK-NEXT: bne .LBB10_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: vpop {d8, d9, d10, d11} |
| ; CHECK-NEXT: pop {r4, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI10_0: |
| ; CHECK-NEXT: .long 4 @ 0x4 |
| ; CHECK-NEXT: .long 16 @ 0x10 |
| ; CHECK-NEXT: .long 28 @ 0x1c |
| ; CHECK-NEXT: .long 40 @ 0x28 |
| ; CHECK-NEXT: .LCPI10_1: |
| ; CHECK-NEXT: .long 0 @ 0x0 |
| ; CHECK-NEXT: .long 3 @ 0x3 |
| ; CHECK-NEXT: .long 6 @ 0x6 |
| ; CHECK-NEXT: .long 9 @ 0x9 |
| ; CHECK-NEXT: .LCPI10_2: |
| ; CHECK-NEXT: .long 8 @ 0x8 |
| ; CHECK-NEXT: .long 20 @ 0x14 |
| ; CHECK-NEXT: .long 32 @ 0x20 |
| ; CHECK-NEXT: .long 44 @ 0x2c |
| vector.ph: |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi i32* [ %x, %vector.ph ], [ %v3, %vector.body ] |
| %pointer.phi55 = phi i32* [ %z, %vector.ph ], [ %v4, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %vector.gep = getelementptr i32, i32* %pointer.phi, <4 x i32> <i32 0, i32 3, i32 6, i32 9> |
| %v3 = getelementptr i32, i32* %pointer.phi, i32 12 |
| %vector.gep56 = getelementptr i32, i32* %pointer.phi55, <4 x i32> <i32 0, i32 3, i32 6, i32 9> |
| %v4 = getelementptr i32, i32* %pointer.phi55, i32 12 |
| %v5 = add i32 %index, 0 |
| %v6 = getelementptr inbounds i32, <4 x i32*> %vector.gep, i32 1 |
| %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %vector.gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) |
| %v7 = getelementptr inbounds i32, <4 x i32*> %vector.gep, i32 2 |
| %wide.masked.gather57 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %v6, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) |
| %wide.masked.gather58 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %v7, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) |
| %v11 = mul nuw nsw <4 x i32> %wide.masked.gather, <i32 10, i32 10, i32 10, i32 10> |
| %v13 = mul nuw nsw <4 x i32> %wide.masked.gather, %wide.masked.gather57 |
| %v15 = mul nuw nsw <4 x i32> %wide.masked.gather, %wide.masked.gather58 |
| %v17 = getelementptr inbounds i32, <4 x i32*> %vector.gep56, i32 1 |
| call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %v11, <4 x i32*> %vector.gep56, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) |
| %v18 = getelementptr inbounds i32, <4 x i32*> %vector.gep56, i32 2 |
| call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %v13, <4 x i32*> %v17, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) |
| call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %v15, <4 x i32*> %v18, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %v37 = icmp eq i32 %index.next, %n |
| br i1 %v37, label %end, label %vector.body |
| |
| end: |
| ret void; |
| } |
| |
| define arm_aapcs_vfpcc void @three_pointer_iv_v4i8(i8* nocapture readonly %x, i8* nocapture %z, i32 %n) { |
| ; CHECK-LABEL: three_pointer_iv_v4i8: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r4, lr} |
| ; CHECK-NEXT: push {r4, lr} |
| ; CHECK-NEXT: .vsave {d8, d9, d10, d11} |
| ; CHECK-NEXT: vpush {d8, d9, d10, d11} |
| ; CHECK-NEXT: adr.w r12, .LCPI11_0 |
| ; CHECK-NEXT: adr.w lr, .LCPI11_1 |
| ; CHECK-NEXT: adr r4, .LCPI11_2 |
| ; CHECK-NEXT: vldrw.u32 q1, [lr] |
| ; CHECK-NEXT: vldrw.u32 q0, [r4] |
| ; CHECK-NEXT: vldrw.u32 q2, [r12] |
| ; CHECK-NEXT: movs r3, #10 |
| ; CHECK-NEXT: .LBB11_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrb.u32 q3, [r0, q0] |
| ; CHECK-NEXT: vldrb.u32 q4, [r0, q1] |
| ; CHECK-NEXT: vldrb.u32 q5, [r0, q2] |
| ; CHECK-NEXT: subs r2, #4 |
| ; CHECK-NEXT: vmul.i32 q3, q4, q3 |
| ; CHECK-NEXT: add.w r0, r0, #12 |
| ; CHECK-NEXT: vmul.i32 q5, q4, q5 |
| ; CHECK-NEXT: vmul.i32 q4, q4, r3 |
| ; CHECK-NEXT: vstrb.32 q4, [r1, q1] |
| ; CHECK-NEXT: vstrb.32 q5, [r1, q2] |
| ; CHECK-NEXT: vstrb.32 q3, [r1, q0] |
| ; CHECK-NEXT: add.w r1, r1, #12 |
| ; CHECK-NEXT: bne .LBB11_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: vpop {d8, d9, d10, d11} |
| ; CHECK-NEXT: pop {r4, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI11_0: |
| ; CHECK-NEXT: .long 1 @ 0x1 |
| ; CHECK-NEXT: .long 4 @ 0x4 |
| ; CHECK-NEXT: .long 7 @ 0x7 |
| ; CHECK-NEXT: .long 10 @ 0xa |
| ; CHECK-NEXT: .LCPI11_1: |
| ; CHECK-NEXT: .long 0 @ 0x0 |
| ; CHECK-NEXT: .long 3 @ 0x3 |
| ; CHECK-NEXT: .long 6 @ 0x6 |
| ; CHECK-NEXT: .long 9 @ 0x9 |
| ; CHECK-NEXT: .LCPI11_2: |
| ; CHECK-NEXT: .long 2 @ 0x2 |
| ; CHECK-NEXT: .long 5 @ 0x5 |
| ; CHECK-NEXT: .long 8 @ 0x8 |
| ; CHECK-NEXT: .long 11 @ 0xb |
| vector.ph: |
| br label %vector.body |
| |
| vector.body: ; preds = %vector.body, %vector.ph |
| %pointer.phi = phi i8* [ %x, %vector.ph ], [ %v3, %vector.body ] |
| %pointer.phi55 = phi i8* [ %z, %vector.ph ], [ %v4, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %vector.gep = getelementptr i8, i8* %pointer.phi, <4 x i32> <i32 0, i32 3, i32 6, i32 9> |
| %v3 = getelementptr i8, i8* %pointer.phi, i32 12 |
| %vector.gep56 = getelementptr i8, i8* %pointer.phi55, <4 x i32> <i32 0, i32 3, i32 6, i32 9> |
| %v4 = getelementptr i8, i8* %pointer.phi55, i32 12 |
| %v5 = add i32 %index, 0 |
| %v6 = getelementptr inbounds i8, <4 x i8*> %vector.gep, i32 1 |
| %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %vector.gep, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) |
| %v7 = getelementptr inbounds i8, <4 x i8*> %vector.gep, i32 2 |
| %wide.masked.gather57 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %v6, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) |
| %wide.masked.gather58 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %v7, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) |
| %v8 = zext <4 x i8> %wide.masked.gather to <4 x i32> |
| %v9 = zext <4 x i8> %wide.masked.gather57 to <4 x i32> |
| %v10 = zext <4 x i8> %wide.masked.gather58 to <4 x i32> |
| %v11 = mul nuw nsw <4 x i32> %v8, <i32 10, i32 10, i32 10, i32 10> |
| %v12 = trunc <4 x i32> %v11 to <4 x i8> |
| %v13 = mul nuw nsw <4 x i32> %v8, %v9 |
| %v14 = trunc <4 x i32> %v13 to <4 x i8> |
| %v15 = mul nuw nsw <4 x i32> %v8, %v10 |
| %v16 = trunc <4 x i32> %v15 to <4 x i8> |
| %v17 = getelementptr inbounds i8, <4 x i8*> %vector.gep56, i32 1 |
| call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %v12, <4 x i8*> %vector.gep56, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) |
| %v18 = getelementptr inbounds i8, <4 x i8*> %vector.gep56, i32 2 |
| call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %v14, <4 x i8*> %v17, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) |
| call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %v16, <4 x i8*> %v18, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %v37 = icmp eq i32 %index.next, %n |
| br i1 %v37, label %end, label %vector.body |
| |
| end: |
| ret void; |
| } |
| |
| define arm_aapcs_vfpcc void @three_pointer_iv_v8i16(i16* nocapture readonly %x, i16* nocapture %z, i32 %n) { |
| ; CHECK-LABEL: three_pointer_iv_v8i16: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r4, lr} |
| ; CHECK-NEXT: push {r4, lr} |
| ; CHECK-NEXT: .vsave {d8, d9, d10, d11} |
| ; CHECK-NEXT: vpush {d8, d9, d10, d11} |
| ; CHECK-NEXT: adr.w r12, .LCPI12_0 |
| ; CHECK-NEXT: adr.w lr, .LCPI12_1 |
| ; CHECK-NEXT: adr r4, .LCPI12_2 |
| ; CHECK-NEXT: vldrw.u32 q1, [lr] |
| ; CHECK-NEXT: vldrw.u32 q0, [r4] |
| ; CHECK-NEXT: vldrw.u32 q2, [r12] |
| ; CHECK-NEXT: movs r3, #10 |
| ; CHECK-NEXT: .LBB12_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrh.u16 q3, [r0, q0] |
| ; CHECK-NEXT: vldrh.u16 q4, [r0, q1, uxtw #1] |
| ; CHECK-NEXT: vldrh.u16 q5, [r0, q2] |
| ; CHECK-NEXT: subs r2, #4 |
| ; CHECK-NEXT: vmul.i16 q3, q4, q3 |
| ; CHECK-NEXT: add.w r0, r0, #48 |
| ; CHECK-NEXT: vmul.i16 q5, q4, q5 |
| ; CHECK-NEXT: vmul.i16 q4, q4, r3 |
| ; CHECK-NEXT: vstrh.16 q4, [r1, q1, uxtw #1] |
| ; CHECK-NEXT: vstrh.16 q5, [r1, q2] |
| ; CHECK-NEXT: vstrh.16 q3, [r1, q0] |
| ; CHECK-NEXT: add.w r1, r1, #48 |
| ; CHECK-NEXT: bne .LBB12_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: vpop {d8, d9, d10, d11} |
| ; CHECK-NEXT: pop {r4, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI12_0: |
| ; CHECK-NEXT: .short 2 @ 0x2 |
| ; CHECK-NEXT: .short 8 @ 0x8 |
| ; CHECK-NEXT: .short 14 @ 0xe |
| ; CHECK-NEXT: .short 20 @ 0x14 |
| ; CHECK-NEXT: .short 26 @ 0x1a |
| ; CHECK-NEXT: .short 32 @ 0x20 |
| ; CHECK-NEXT: .short 38 @ 0x26 |
| ; CHECK-NEXT: .short 44 @ 0x2c |
| ; CHECK-NEXT: .LCPI12_1: |
| ; CHECK-NEXT: .short 0 @ 0x0 |
| ; CHECK-NEXT: .short 3 @ 0x3 |
| ; CHECK-NEXT: .short 6 @ 0x6 |
| ; CHECK-NEXT: .short 9 @ 0x9 |
| ; CHECK-NEXT: .short 12 @ 0xc |
| ; CHECK-NEXT: .short 15 @ 0xf |
| ; CHECK-NEXT: .short 18 @ 0x12 |
| ; CHECK-NEXT: .short 21 @ 0x15 |
| ; CHECK-NEXT: .LCPI12_2: |
| ; CHECK-NEXT: .short 4 @ 0x4 |
| ; CHECK-NEXT: .short 10 @ 0xa |
| ; CHECK-NEXT: .short 16 @ 0x10 |
| ; CHECK-NEXT: .short 22 @ 0x16 |
| ; CHECK-NEXT: .short 28 @ 0x1c |
| ; CHECK-NEXT: .short 34 @ 0x22 |
| ; CHECK-NEXT: .short 40 @ 0x28 |
| ; CHECK-NEXT: .short 46 @ 0x2e |
| vector.ph: |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi i16* [ %x, %vector.ph ], [ %v3, %vector.body ] |
| %pointer.phi55 = phi i16* [ %z, %vector.ph ], [ %v4, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %vector.gep = getelementptr i16, i16* %pointer.phi, <8 x i16> <i16 0, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21> |
| %v3 = getelementptr i16, i16* %pointer.phi, i32 24 |
| %vector.gep56 = getelementptr i16, i16* %pointer.phi55, <8 x i16> <i16 0, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21> |
| %v4 = getelementptr i16, i16* %pointer.phi55, i32 24 |
| %v5 = add i32 %index, 0 |
| %v6 = getelementptr inbounds i16, <8 x i16*> %vector.gep, i16 1 |
| %wide.masked.gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %vector.gep, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) |
| %v7 = getelementptr inbounds i16, <8 x i16*> %vector.gep, i16 2 |
| %wide.masked.gather57 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %v6, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) |
| %wide.masked.gather58 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %v7, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) |
| %v11 = mul nuw nsw <8 x i16> %wide.masked.gather, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> |
| %v13 = mul nuw nsw <8 x i16> %wide.masked.gather, %wide.masked.gather57 |
| %v15 = mul nuw nsw <8 x i16> %wide.masked.gather, %wide.masked.gather58 |
| %v17 = getelementptr inbounds i16, <8 x i16*> %vector.gep56, i32 1 |
| call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %v11, <8 x i16*> %vector.gep56, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| %v18 = getelementptr inbounds i16, <8 x i16*> %vector.gep56, i32 2 |
| call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %v13, <8 x i16*> %v17, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %v15, <8 x i16*> %v18, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %v37 = icmp eq i32 %index.next, %n |
| br i1 %v37, label %end, label %vector.body |
| |
| end: |
| ret void; |
| } |
| |
| define arm_aapcs_vfpcc void @three_pointer_iv_v16i8(i8* nocapture readonly %x, i8* nocapture %z, i32 %n) { |
| ; CHECK-LABEL: three_pointer_iv_v16i8: |
| ; CHECK: @ %bb.0: @ %vector.ph |
| ; CHECK-NEXT: .save {r4, lr} |
| ; CHECK-NEXT: push {r4, lr} |
| ; CHECK-NEXT: .vsave {d8, d9, d10, d11} |
| ; CHECK-NEXT: vpush {d8, d9, d10, d11} |
| ; CHECK-NEXT: adr.w r12, .LCPI13_0 |
| ; CHECK-NEXT: adr.w lr, .LCPI13_1 |
| ; CHECK-NEXT: adr r4, .LCPI13_2 |
| ; CHECK-NEXT: vldrw.u32 q1, [lr] |
| ; CHECK-NEXT: vldrw.u32 q0, [r4] |
| ; CHECK-NEXT: vldrw.u32 q2, [r12] |
| ; CHECK-NEXT: movs r3, #10 |
| ; CHECK-NEXT: .LBB13_1: @ %vector.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: vldrb.u8 q3, [r0, q0] |
| ; CHECK-NEXT: vldrb.u8 q4, [r0, q1] |
| ; CHECK-NEXT: vldrb.u8 q5, [r0, q2] |
| ; CHECK-NEXT: subs r2, #4 |
| ; CHECK-NEXT: vmul.i8 q3, q4, q3 |
| ; CHECK-NEXT: add.w r0, r0, #48 |
| ; CHECK-NEXT: vmul.i8 q5, q4, q5 |
| ; CHECK-NEXT: vmul.i8 q4, q4, r3 |
| ; CHECK-NEXT: vstrb.8 q4, [r1, q1] |
| ; CHECK-NEXT: vstrb.8 q5, [r1, q2] |
| ; CHECK-NEXT: vstrb.8 q3, [r1, q0] |
| ; CHECK-NEXT: add.w r1, r1, #48 |
| ; CHECK-NEXT: bne .LBB13_1 |
| ; CHECK-NEXT: @ %bb.2: @ %end |
| ; CHECK-NEXT: vpop {d8, d9, d10, d11} |
| ; CHECK-NEXT: pop {r4, pc} |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: .LCPI13_0: |
| ; CHECK-NEXT: .byte 1 @ 0x1 |
| ; CHECK-NEXT: .byte 4 @ 0x4 |
| ; CHECK-NEXT: .byte 7 @ 0x7 |
| ; CHECK-NEXT: .byte 10 @ 0xa |
| ; CHECK-NEXT: .byte 13 @ 0xd |
| ; CHECK-NEXT: .byte 16 @ 0x10 |
| ; CHECK-NEXT: .byte 19 @ 0x13 |
| ; CHECK-NEXT: .byte 22 @ 0x16 |
| ; CHECK-NEXT: .byte 25 @ 0x19 |
| ; CHECK-NEXT: .byte 28 @ 0x1c |
| ; CHECK-NEXT: .byte 31 @ 0x1f |
| ; CHECK-NEXT: .byte 34 @ 0x22 |
| ; CHECK-NEXT: .byte 37 @ 0x25 |
| ; CHECK-NEXT: .byte 40 @ 0x28 |
| ; CHECK-NEXT: .byte 43 @ 0x2b |
| ; CHECK-NEXT: .byte 46 @ 0x2e |
| ; CHECK-NEXT: .LCPI13_1: |
| ; CHECK-NEXT: .byte 0 @ 0x0 |
| ; CHECK-NEXT: .byte 3 @ 0x3 |
| ; CHECK-NEXT: .byte 6 @ 0x6 |
| ; CHECK-NEXT: .byte 9 @ 0x9 |
| ; CHECK-NEXT: .byte 12 @ 0xc |
| ; CHECK-NEXT: .byte 15 @ 0xf |
| ; CHECK-NEXT: .byte 18 @ 0x12 |
| ; CHECK-NEXT: .byte 21 @ 0x15 |
| ; CHECK-NEXT: .byte 24 @ 0x18 |
| ; CHECK-NEXT: .byte 27 @ 0x1b |
| ; CHECK-NEXT: .byte 30 @ 0x1e |
| ; CHECK-NEXT: .byte 33 @ 0x21 |
| ; CHECK-NEXT: .byte 36 @ 0x24 |
| ; CHECK-NEXT: .byte 39 @ 0x27 |
| ; CHECK-NEXT: .byte 42 @ 0x2a |
| ; CHECK-NEXT: .byte 45 @ 0x2d |
| ; CHECK-NEXT: .LCPI13_2: |
| ; CHECK-NEXT: .byte 2 @ 0x2 |
| ; CHECK-NEXT: .byte 5 @ 0x5 |
| ; CHECK-NEXT: .byte 8 @ 0x8 |
| ; CHECK-NEXT: .byte 11 @ 0xb |
| ; CHECK-NEXT: .byte 14 @ 0xe |
| ; CHECK-NEXT: .byte 17 @ 0x11 |
| ; CHECK-NEXT: .byte 20 @ 0x14 |
| ; CHECK-NEXT: .byte 23 @ 0x17 |
| ; CHECK-NEXT: .byte 26 @ 0x1a |
| ; CHECK-NEXT: .byte 29 @ 0x1d |
| ; CHECK-NEXT: .byte 32 @ 0x20 |
| ; CHECK-NEXT: .byte 35 @ 0x23 |
| ; CHECK-NEXT: .byte 38 @ 0x26 |
| ; CHECK-NEXT: .byte 41 @ 0x29 |
| ; CHECK-NEXT: .byte 44 @ 0x2c |
| ; CHECK-NEXT: .byte 47 @ 0x2f |
| vector.ph: |
| br label %vector.body |
| |
| vector.body: |
| %pointer.phi = phi i8* [ %x, %vector.ph ], [ %v3, %vector.body ] |
| %pointer.phi55 = phi i8* [ %z, %vector.ph ], [ %v4, %vector.body ] |
| %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] |
| %vector.gep = getelementptr i8, i8* %pointer.phi, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45> |
| %v3 = getelementptr i8, i8* %pointer.phi, i32 48 |
| %vector.gep56 = getelementptr i8, i8* %pointer.phi55, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45> |
| %v4 = getelementptr i8, i8* %pointer.phi55, i32 48 |
| %v5 = add i32 %index, 0 |
| %v6 = getelementptr inbounds i8, <16 x i8*> %vector.gep, i8 1 |
| %wide.masked.gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %vector.gep, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) |
| %v7 = getelementptr inbounds i8, <16 x i8*> %vector.gep, i8 2 |
| %wide.masked.gather57 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %v6, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) |
| %wide.masked.gather58 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %v7, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) |
| %v11 = mul nuw nsw <16 x i8> %wide.masked.gather, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> |
| %v13 = mul nuw nsw <16 x i8> %wide.masked.gather, %wide.masked.gather57 |
| %v15 = mul nuw nsw <16 x i8> %wide.masked.gather, %wide.masked.gather58 |
| %v17 = getelementptr inbounds i8, <16 x i8*> %vector.gep56, i32 1 |
| call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %v11, <16 x i8*> %vector.gep56, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| %v18 = getelementptr inbounds i8, <16 x i8*> %vector.gep56, i32 2 |
| call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %v13, <16 x i8*> %v17, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %v15, <16 x i8*> %v18, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) |
| %index.next = add i32 %index, 4 |
| %v37 = icmp eq i32 %index.next, %n |
| br i1 %v37, label %end, label %vector.body |
| |
| end: |
| ret void; |
| } |
| |
| declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>) |
| declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) |
| declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>) |
| declare <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*>, i32, <8 x i1>, <8 x i16>) |
| declare <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*>, i32, <8 x i1>, <8 x half>) |
| declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>) |
| |
| declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>) |
| declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) |
| declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>) |
| declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>) |
| declare void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half>, <8 x half*>, i32, <8 x i1>) |
| declare void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8>, <16 x i8*>, i32, <16 x i1>) |