| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=arm64-apple-ios -mattr=+sve -o - %s | FileCheck %s |
| ; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+sve -o - %s | FileCheck --check-prefix=CHECK-BE %s |
| |
| ; CHECK-LABEL: lCPI0_0: |
| ; CHECK-NEXT: .byte 0 ; 0x0 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 1 ; 0x1 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 2 ; 0x2 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 3 ; 0x3 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT:lCPI0_1: |
| ; CHECK-NEXT: .byte 4 ; 0x4 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 5 ; 0x5 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 6 ; 0x6 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 7 ; 0x7 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT:lCPI0_2: |
| ; CHECK-NEXT: .byte 8 ; 0x8 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 9 ; 0x9 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 10 ; 0xa |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 11 ; 0xb |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT:lCPI0_3: |
| ; CHECK-NEXT: .byte 12 ; 0xc |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 13 ; 0xd |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 14 ; 0xe |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 15 ; 0xf |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| |
| ; CHECK-BE: .LCPI0_0: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 0 // 0x0 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 1 // 0x1 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 2 // 0x2 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 3 // 0x3 |
| ; CHECK-BE-NEXT: .LCPI0_1: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 4 // 0x4 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 5 // 0x5 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 6 // 0x6 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 7 // 0x7 |
| ; CHECK-BE-NEXT: .LCPI0_2: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 8 // 0x8 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 9 // 0x9 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 10 // 0xa |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 11 // 0xb |
| ; CHECK-BE-NEXT: .LCPI0_3: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 12 // 0xc |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 13 // 0xd |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 14 // 0xe |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 15 // 0xf |
| |
| ; It's profitable to convert the zext to a shuffle, which in turn will be |
| ; lowered to 4 tbl instructions. The masks are materialized outside the loop. |
| define void @zext_v16i8_to_v16i32_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: Lloh0: |
| ; CHECK-NEXT: adrp x9, lCPI0_0@PAGE |
| ; CHECK-NEXT: Lloh1: |
| ; CHECK-NEXT: adrp x10, lCPI0_1@PAGE |
| ; CHECK-NEXT: Lloh2: |
| ; CHECK-NEXT: adrp x11, lCPI0_2@PAGE |
| ; CHECK-NEXT: Lloh3: |
| ; CHECK-NEXT: adrp x12, lCPI0_3@PAGE |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: Lloh4: |
| ; CHECK-NEXT: ldr q0, [x9, lCPI0_0@PAGEOFF] |
| ; CHECK-NEXT: Lloh5: |
| ; CHECK-NEXT: ldr q1, [x10, lCPI0_1@PAGEOFF] |
| ; CHECK-NEXT: Lloh6: |
| ; CHECK-NEXT: ldr q2, [x11, lCPI0_2@PAGEOFF] |
| ; CHECK-NEXT: Lloh7: |
| ; CHECK-NEXT: ldr q3, [x12, lCPI0_3@PAGEOFF] |
| ; CHECK-NEXT: LBB0_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q4, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: tbl.16b v5, { v4 }, v3 |
| ; CHECK-NEXT: tbl.16b v6, { v4 }, v2 |
| ; CHECK-NEXT: tbl.16b v7, { v4 }, v1 |
| ; CHECK-NEXT: tbl.16b v4, { v4 }, v0 |
| ; CHECK-NEXT: stp q6, q5, [x1, #32] |
| ; CHECK-NEXT: stp q4, q7, [x1], #64 |
| ; CHECK-NEXT: b.ne LBB0_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh7 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh6 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh1, Lloh5 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh4 |
| ; |
| ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: adrp x8, .LCPI0_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_0 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI0_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_1 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI0_2 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_2 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI0_3 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_3 |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8] |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB0_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x10, x1, #32 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #48 |
| ; CHECK-BE-NEXT: tbl v5.16b, { v4.16b }, v3.16b |
| ; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v0.16b |
| ; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v2.16b |
| ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v1.16b |
| ; CHECK-BE-NEXT: st1 { v5.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #16 |
| ; CHECK-BE-NEXT: st1 { v6.16b }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #64 |
| ; CHECK-BE-NEXT: st1 { v7.16b }, [x10] |
| ; CHECK-BE-NEXT: st1 { v4.16b }, [x9] |
| ; CHECK-BE-NEXT: b.ne .LBB0_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <16 x i8>, ptr %src.gep |
| %ext = zext <16 x i8> %load to <16 x i32> |
| %dst.gep = getelementptr i32, ptr %dst, i64 %iv |
| store <16 x i32> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v16i8_to_v16i32_in_loop_not_header(ptr %src, ptr %dst, i1 %c) { |
| ; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_not_header: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: b LBB1_2 |
| ; CHECK-NEXT: LBB1_1: ; %loop.latch |
| ; CHECK-NEXT: ; in Loop: Header=BB1_2 Depth=1 |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: add x1, x1, #64 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: b.eq LBB1_4 |
| ; CHECK-NEXT: LBB1_2: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: tbz w2, #0, LBB1_1 |
| ; CHECK-NEXT: ; %bb.3: ; %then |
| ; CHECK-NEXT: ; in Loop: Header=BB1_2 Depth=1 |
| ; CHECK-NEXT: ldr q0, [x0, x8] |
| ; CHECK-NEXT: ushll2.8h v1, v0, #0 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: ushll2.4s v2, v1, #0 |
| ; CHECK-NEXT: ushll.4s v1, v1, #0 |
| ; CHECK-NEXT: ushll2.4s v3, v0, #0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: stp q1, q2, [x1, #32] |
| ; CHECK-NEXT: stp q0, q3, [x1] |
| ; CHECK-NEXT: b LBB1_1 |
| ; CHECK-NEXT: LBB1_4: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_not_header: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: b .LBB1_2 |
| ; CHECK-BE-NEXT: .LBB1_1: // %loop.latch |
| ; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: add x1, x1, #64 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: b.eq .LBB1_4 |
| ; CHECK-BE-NEXT: .LBB1_2: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: tbz w2, #0, .LBB1_1 |
| ; CHECK-BE-NEXT: // %bb.3: // %then |
| ; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x10, x1, #32 |
| ; CHECK-BE-NEXT: add x11, x1, #16 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #48 |
| ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 |
| ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 |
| ; CHECK-BE-NEXT: ushll2 v3.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x9] |
| ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x10] |
| ; CHECK-BE-NEXT: st1 { v3.4s }, [x11] |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x1] |
| ; CHECK-BE-NEXT: b .LBB1_1 |
| ; CHECK-BE-NEXT: .LBB1_4: // %exit |
| ; CHECK-BE-NEXT: ret |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| br i1 %c, label %then, label %loop.latch |
| |
| then: |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <16 x i8>, ptr %src.gep |
| %ext = zext <16 x i8> %load to <16 x i32> |
| %dst.gep = getelementptr i32, ptr %dst, i64 %iv |
| store <16 x i32> %ext, ptr %dst.gep |
| br label %loop.latch |
| |
| loop.latch: |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; Not profitable to use shuffle/tbl, as 4 tbls + materializing the masks |
| ; require more instructions than lowering zext directly. |
| define void @zext_v16i8_to_v16i32_no_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v16i8_to_v16i32_no_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: ldr q0, [x0] |
| ; CHECK-NEXT: ushll2.8h v1, v0, #0 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: ushll2.4s v2, v1, #0 |
| ; CHECK-NEXT: ushll.4s v1, v1, #0 |
| ; CHECK-NEXT: ushll2.4s v3, v0, #0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: stp q1, q2, [x1, #32] |
| ; CHECK-NEXT: stp q0, q3, [x1] |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_no_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] |
| ; CHECK-BE-NEXT: add x8, x1, #48 |
| ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 |
| ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x1, #32 |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x1, #16 |
| ; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x1] |
| ; CHECK-BE-NEXT: ret |
| entry: |
| %load = load <16 x i8>, ptr %src |
| %ext = zext <16 x i8> %load to <16 x i32> |
| store <16 x i32> %ext, ptr %dst |
| ret void |
| } |
| |
| ; Avoid using tbl when optimizing for size. |
| define void @zext_v16i8_to_v16i32_in_loop_optsize(ptr %src, ptr %dst) optsize { |
| ; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_optsize: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB3_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q0, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ushll2.8h v1, v0, #0 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: ushll2.4s v2, v1, #0 |
| ; CHECK-NEXT: ushll.4s v1, v1, #0 |
| ; CHECK-NEXT: ushll2.4s v3, v0, #0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: stp q1, q2, [x1, #32] |
| ; CHECK-NEXT: stp q0, q3, [x1], #64 |
| ; CHECK-NEXT: b.ne LBB3_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_optsize: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB3_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x10, x1, #32 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #48 |
| ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 |
| ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #16 |
| ; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x10] |
| ; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #64 |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x9] |
| ; CHECK-BE-NEXT: b.ne .LBB3_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <16 x i8>, ptr %src.gep |
| %ext = zext <16 x i8> %load to <16 x i32> |
| %dst.gep = getelementptr i32, ptr %dst, i64 %iv |
| store <16 x i32> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; Avoid using tbl when optimizing for size. |
| define void @zext_v16i8_to_v16i32_in_loop_minsize(ptr %src, ptr %dst) minsize { |
| ; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_minsize: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB4_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q0, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ushll2.8h v1, v0, #0 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: ushll2.4s v2, v1, #0 |
| ; CHECK-NEXT: ushll.4s v1, v1, #0 |
| ; CHECK-NEXT: ushll2.4s v3, v0, #0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: stp q1, q2, [x1, #32] |
| ; CHECK-NEXT: stp q0, q3, [x1], #64 |
| ; CHECK-NEXT: b.ne LBB4_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_minsize: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB4_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x10, x1, #32 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #48 |
| ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 |
| ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #16 |
| ; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x10] |
| ; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #64 |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x9] |
| ; CHECK-BE-NEXT: b.ne .LBB4_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <16 x i8>, ptr %src.gep |
| %ext = zext <16 x i8> %load to <16 x i32> |
| %dst.gep = getelementptr i32, ptr %dst, i64 %iv |
| store <16 x i32> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v16i8_to_v16i16_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v16i8_to_v16i16_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB5_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q0, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ushll2.8h v1, v0, #0 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: stp q0, q1, [x1], #32 |
| ; CHECK-NEXT: b.ne LBB5_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i8_to_v16i16_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB5_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #16 |
| ; CHECK-BE-NEXT: ushll v1.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0 |
| ; CHECK-BE-NEXT: st1 { v1.8h }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #32 |
| ; CHECK-BE-NEXT: st1 { v0.8h }, [x9] |
| ; CHECK-BE-NEXT: b.ne .LBB5_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <16 x i8>, ptr %src.gep |
| %ext = zext <16 x i8> %load to <16 x i16> |
| %dst.gep = getelementptr i16, ptr %dst, i64 %iv |
| store <16 x i16> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; CHECK-LABEL: lCPI6_0: |
| ; CHECK-NEXT: .byte 0 ; 0x0 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 1 ; 0x1 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 2 ; 0x2 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 3 ; 0x3 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: lCPI6_1: |
| ; CHECK-NEXT: .byte 4 ; 0x4 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 5 ; 0x5 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 6 ; 0x6 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 7 ; 0x7 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| |
| ; CHECK-BE: .LCPI6_0: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 0 // 0x0 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 1 // 0x1 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 2 // 0x2 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 3 // 0x3 |
| ; CHECK-BE-NEXT: .LCPI6_1: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 4 // 0x4 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 5 // 0x5 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 6 // 0x6 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 7 // 0x7 |
| |
| define void @zext_v8i8_to_v8i32_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v8i8_to_v8i32_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: Lloh8: |
| ; CHECK-NEXT: adrp x9, lCPI6_0@PAGE |
| ; CHECK-NEXT: Lloh9: |
| ; CHECK-NEXT: adrp x10, lCPI6_1@PAGE |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: Lloh10: |
| ; CHECK-NEXT: ldr q0, [x9, lCPI6_0@PAGEOFF] |
| ; CHECK-NEXT: Lloh11: |
| ; CHECK-NEXT: ldr q1, [x10, lCPI6_1@PAGEOFF] |
| ; CHECK-NEXT: LBB6_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d2, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: tbl.16b v3, { v2 }, v1 |
| ; CHECK-NEXT: tbl.16b v2, { v2 }, v0 |
| ; CHECK-NEXT: stp q2, q3, [x1], #64 |
| ; CHECK-NEXT: b.ne LBB6_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .loh AdrpLdr Lloh9, Lloh11 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh10 |
| ; |
| ; CHECK-BE-LABEL: zext_v8i8_to_v8i32_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: adrp x8, .LCPI6_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI6_0 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI6_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI6_1 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB6_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v2.8b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #16 |
| ; CHECK-BE-NEXT: tbl v3.16b, { v2.16b }, v0.16b |
| ; CHECK-BE-NEXT: tbl v2.16b, { v2.16b }, v1.16b |
| ; CHECK-BE-NEXT: st1 { v3.16b }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #64 |
| ; CHECK-BE-NEXT: st1 { v2.16b }, [x9] |
| ; CHECK-BE-NEXT: b.ne .LBB6_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <8 x i8>, ptr %src.gep |
| %ext = zext <8 x i8> %load to <8 x i32> |
| %dst.gep = getelementptr i32, ptr %dst, i64 %iv |
| store <8 x i32> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v16i8_to_v16i64_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v16i8_to_v16i64_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB7_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q0, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ushll.8h v1, v0, #0 |
| ; CHECK-NEXT: ushll2.8h v0, v0, #0 |
| ; CHECK-NEXT: ushll2.4s v2, v1, #0 |
| ; CHECK-NEXT: ushll2.4s v3, v0, #0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: ushll2.2d v4, v3, #0 |
| ; CHECK-NEXT: ushll2.2d v5, v0, #0 |
| ; CHECK-NEXT: ushll.2d v0, v0, #0 |
| ; CHECK-NEXT: ushll.2d v3, v3, #0 |
| ; CHECK-NEXT: stp q0, q5, [x1, #64] |
| ; CHECK-NEXT: ushll.4s v0, v1, #0 |
| ; CHECK-NEXT: stp q3, q4, [x1, #96] |
| ; CHECK-NEXT: ushll2.2d v3, v2, #0 |
| ; CHECK-NEXT: ushll.2d v2, v2, #0 |
| ; CHECK-NEXT: ushll2.2d v1, v0, #0 |
| ; CHECK-NEXT: ushll.2d v0, v0, #0 |
| ; CHECK-NEXT: stp q2, q3, [x1, #32] |
| ; CHECK-NEXT: stp q0, q1, [x1], #128 |
| ; CHECK-NEXT: b.ne LBB7_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i8_to_v16i64_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB7_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x10, x1, #96 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #112 |
| ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 |
| ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 |
| ; CHECK-BE-NEXT: ushll2 v3.2d, v2.4s, #0 |
| ; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #80 |
| ; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x10] |
| ; CHECK-BE-NEXT: ushll2 v2.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: add x10, x1, #48 |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #64 |
| ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 |
| ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: ushll2 v4.2d, v2.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v1.2d }, [x9] |
| ; CHECK-BE-NEXT: ushll v1.2d, v0.2s, #0 |
| ; CHECK-BE-NEXT: add x9, x1, #16 |
| ; CHECK-BE-NEXT: st1 { v4.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x1, #32 |
| ; CHECK-BE-NEXT: st1 { v1.2d }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #128 |
| ; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #0 |
| ; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v0.2d }, [x9] |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x10] |
| ; CHECK-BE-NEXT: b.ne .LBB7_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <16 x i8>, ptr %src.gep |
| %ext = zext <16 x i8> %load to <16 x i64> |
| %dst.gep = getelementptr i64, ptr %dst, i64 %iv |
| store <16 x i64> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v8i8_to_v8i64_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v8i8_to_v8i64_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB8_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d0, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: ushll2.4s v1, v0, #0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: ushll2.2d v2, v1, #0 |
| ; CHECK-NEXT: ushll.2d v1, v1, #0 |
| ; CHECK-NEXT: ushll2.2d v3, v0, #0 |
| ; CHECK-NEXT: ushll.2d v0, v0, #0 |
| ; CHECK-NEXT: stp q1, q2, [x1, #32] |
| ; CHECK-NEXT: stp q0, q3, [x1], #128 |
| ; CHECK-NEXT: b.ne LBB8_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v8i8_to_v8i64_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB8_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x10, x1, #32 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #48 |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: ushll2 v2.2d, v1.4s, #0 |
| ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #16 |
| ; CHECK-BE-NEXT: ushll v2.2d, v0.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v1.2d }, [x10] |
| ; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #128 |
| ; CHECK-BE-NEXT: st1 { v0.2d }, [x9] |
| ; CHECK-BE-NEXT: b.ne .LBB8_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <8 x i8>, ptr %src.gep |
| %ext = zext <8 x i8> %load to <8 x i64> |
| %dst.gep = getelementptr i64, ptr %dst, i64 %iv |
| store <8 x i64> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v8i8_to_v8i16_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v8i8_to_v8i16_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB9_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d0, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: str q0, [x1], #32 |
| ; CHECK-NEXT: b.ne LBB9_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v8i8_to_v8i16_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB9_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: st1 { v0.8h }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #32 |
| ; CHECK-BE-NEXT: b.ne .LBB9_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <8 x i8>, ptr %src.gep |
| %ext = zext <8 x i8> %load to <8 x i16> |
| %dst.gep = getelementptr i16, ptr %dst, i64 %iv |
| store <8 x i16> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v8i8_to_v8i20_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v8i8_to_v8i20_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB10_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d0, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: ushll2.4s v1, v0, #0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: mov.s w11, v1[1] |
| ; CHECK-NEXT: mov.s w13, v0[1] |
| ; CHECK-NEXT: fmov w12, s1 |
| ; CHECK-NEXT: mov.s w14, v1[2] |
| ; CHECK-NEXT: fmov w15, s0 |
| ; CHECK-NEXT: mov.s w16, v0[2] |
| ; CHECK-NEXT: mov.s w9, v1[3] |
| ; CHECK-NEXT: mov.s w10, v0[3] |
| ; CHECK-NEXT: orr x11, x12, x11, lsl #20 |
| ; CHECK-NEXT: orr x12, x15, x13, lsl #20 |
| ; CHECK-NEXT: orr x11, x11, x14, lsl #40 |
| ; CHECK-NEXT: orr x12, x12, x16, lsl #40 |
| ; CHECK-NEXT: lsr w13, w9, #4 |
| ; CHECK-NEXT: lsr w14, w10, #4 |
| ; CHECK-NEXT: orr x9, x11, x9, lsl #60 |
| ; CHECK-NEXT: orr x10, x12, x10, lsl #60 |
| ; CHECK-NEXT: strh w13, [x1, #18] |
| ; CHECK-NEXT: strh w14, [x1, #8] |
| ; CHECK-NEXT: stur x9, [x1, #10] |
| ; CHECK-NEXT: str x10, [x1], #64 |
| ; CHECK-NEXT: b.ne LBB10_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v8i8_to_v8i20_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB10_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: mov w9, v1.s[1] |
| ; CHECK-BE-NEXT: mov w11, v0.s[1] |
| ; CHECK-BE-NEXT: mov w13, v1.s[2] |
| ; CHECK-BE-NEXT: fmov w14, s1 |
| ; CHECK-BE-NEXT: mov w15, v0.s[2] |
| ; CHECK-BE-NEXT: fmov w16, s0 |
| ; CHECK-BE-NEXT: mov w10, v1.s[3] |
| ; CHECK-BE-NEXT: lsl x9, x9, #40 |
| ; CHECK-BE-NEXT: mov w12, v0.s[3] |
| ; CHECK-BE-NEXT: lsl x11, x11, #40 |
| ; CHECK-BE-NEXT: orr x9, x9, x14, lsl #60 |
| ; CHECK-BE-NEXT: orr x11, x11, x16, lsl #60 |
| ; CHECK-BE-NEXT: orr x9, x9, x13, lsl #20 |
| ; CHECK-BE-NEXT: orr x11, x11, x15, lsl #20 |
| ; CHECK-BE-NEXT: lsr w13, w14, #4 |
| ; CHECK-BE-NEXT: lsr w14, w16, #4 |
| ; CHECK-BE-NEXT: strh w10, [x1, #18] |
| ; CHECK-BE-NEXT: extr x9, x13, x9, #16 |
| ; CHECK-BE-NEXT: strh w12, [x1, #8] |
| ; CHECK-BE-NEXT: extr x10, x14, x11, #16 |
| ; CHECK-BE-NEXT: stur x9, [x1, #10] |
| ; CHECK-BE-NEXT: str x10, [x1], #64 |
| ; CHECK-BE-NEXT: b.ne .LBB10_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <8 x i8>, ptr %src.gep |
| %ext = zext <8 x i8> %load to <8 x i20> |
| %dst.gep = getelementptr i20, ptr %dst, i64 %iv |
| store <8 x i20> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v4i8_to_v4i32_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v4i8_to_v4i32_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB11_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr s0, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: str q0, [x1], #64 |
| ; CHECK-NEXT: b.ne LBB11_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v4i8_to_v4i32_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: adrp x8, .LCPI11_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI11_0 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB11_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: ldr s1, [x0, x8] |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: rev32 v1.16b, v1.16b |
| ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b }, v0.16b |
| ; CHECK-BE-NEXT: st1 { v1.16b }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #64 |
| ; CHECK-BE-NEXT: b.ne .LBB11_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <4 x i8>, ptr %src.gep |
| %ext = zext <4 x i8> %load to <4 x i32> |
| %dst.gep = getelementptr i32, ptr %dst, i64 %iv |
| store <4 x i32> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; CHECK-LABEL: lCPI12_0: |
| ; CHECK-NEXT: .byte 0 ; 0x0 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 1 ; 0x1 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 2 ; 0x2 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 3 ; 0x3 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: lCPI12_1: |
| ; CHECK-NEXT: .byte 4 ; 0x4 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 5 ; 0x5 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 6 ; 0x6 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 7 ; 0x7 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: lCPI12_2: |
| ; CHECK-NEXT: .byte 8 ; 0x8 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 9 ; 0x9 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 10 ; 0xa |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 11 ; 0xb |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| |
| ; CHECK-BE-LABEL: .LCPI12_0: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 0 // 0x0 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 1 // 0x1 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 2 // 0x2 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 3 // 0x3 |
| ; CHECK-BE-NEXT: .LCPI12_1: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 4 // 0x4 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 5 // 0x5 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 6 // 0x6 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 7 // 0x7 |
| ; CHECK-BE-NEXT: .LCPI12_2: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 8 // 0x8 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 9 // 0x9 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 10 // 0xa |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 11 // 0xb |
| |
| define void @zext_v12i8_to_v12i32_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v12i8_to_v12i32_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: Lloh12: |
| ; CHECK-NEXT: adrp x9, lCPI12_0@PAGE |
| ; CHECK-NEXT: Lloh13: |
| ; CHECK-NEXT: adrp x10, lCPI12_1@PAGE |
| ; CHECK-NEXT: Lloh14: |
| ; CHECK-NEXT: adrp x11, lCPI12_2@PAGE |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: Lloh15: |
| ; CHECK-NEXT: ldr q0, [x9, lCPI12_0@PAGEOFF] |
| ; CHECK-NEXT: Lloh16: |
| ; CHECK-NEXT: ldr q1, [x10, lCPI12_1@PAGEOFF] |
| ; CHECK-NEXT: Lloh17: |
| ; CHECK-NEXT: ldr q2, [x11, lCPI12_2@PAGEOFF] |
| ; CHECK-NEXT: LBB12_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q3, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: tbl.16b v4, { v3 }, v2 |
| ; CHECK-NEXT: tbl.16b v5, { v3 }, v1 |
| ; CHECK-NEXT: tbl.16b v3, { v3 }, v0 |
| ; CHECK-NEXT: stp q5, q4, [x1, #16] |
| ; CHECK-NEXT: str q3, [x1], #64 |
| ; CHECK-NEXT: b.ne LBB12_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh17 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh13, Lloh16 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh15 |
| ; |
| ; CHECK-BE-LABEL: zext_v12i8_to_v12i32_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: adrp x8, .LCPI12_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI12_0 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI12_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI12_1 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI12_2 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI12_2 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB12_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x10, x1, #16 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #32 |
| ; CHECK-BE-NEXT: tbl v4.16b, { v3.16b }, v0.16b |
| ; CHECK-BE-NEXT: tbl v5.16b, { v3.16b }, v2.16b |
| ; CHECK-BE-NEXT: tbl v3.16b, { v3.16b }, v1.16b |
| ; CHECK-BE-NEXT: st1 { v4.16b }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #64 |
| ; CHECK-BE-NEXT: st1 { v5.16b }, [x9] |
| ; CHECK-BE-NEXT: st1 { v3.16b }, [x10] |
| ; CHECK-BE-NEXT: b.ne .LBB12_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <12 x i8>, ptr %src.gep |
| %ext = zext <12 x i8> %load to <12 x i32> |
| %dst.gep = getelementptr i32, ptr %dst, i64 %iv |
| store <12 x i32> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v16i4_to_v16i32_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: movi.4s v0, #15 |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB13_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr x9, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: and w10, w9, #0xf |
| ; CHECK-NEXT: ubfx w11, w9, #4, #4 |
| ; CHECK-NEXT: fmov s1, w10 |
| ; CHECK-NEXT: ubfx w10, w9, #8, #4 |
| ; CHECK-NEXT: mov.b v1[1], w11 |
| ; CHECK-NEXT: mov.b v1[2], w10 |
| ; CHECK-NEXT: ubfx w10, w9, #12, #4 |
| ; CHECK-NEXT: mov.b v1[3], w10 |
| ; CHECK-NEXT: ubfx w10, w9, #16, #4 |
| ; CHECK-NEXT: mov.b v1[4], w10 |
| ; CHECK-NEXT: ubfx w10, w9, #20, #4 |
| ; CHECK-NEXT: mov.b v1[5], w10 |
| ; CHECK-NEXT: ubfx w10, w9, #24, #4 |
| ; CHECK-NEXT: mov.b v1[6], w10 |
| ; CHECK-NEXT: ubfx x10, x9, #28, #4 |
| ; CHECK-NEXT: mov.b v1[7], w10 |
| ; CHECK-NEXT: ubfx x10, x9, #32, #4 |
| ; CHECK-NEXT: mov.b v1[8], w10 |
| ; CHECK-NEXT: ubfx x10, x9, #36, #4 |
| ; CHECK-NEXT: mov.b v1[9], w10 |
| ; CHECK-NEXT: ubfx x10, x9, #40, #4 |
| ; CHECK-NEXT: mov.b v1[10], w10 |
| ; CHECK-NEXT: ubfx x10, x9, #44, #4 |
| ; CHECK-NEXT: mov.b v1[11], w10 |
| ; CHECK-NEXT: ubfx x10, x9, #48, #4 |
| ; CHECK-NEXT: mov.b v1[12], w10 |
| ; CHECK-NEXT: ubfx x10, x9, #52, #4 |
| ; CHECK-NEXT: mov.b v1[13], w10 |
| ; CHECK-NEXT: ubfx x10, x9, #56, #4 |
| ; CHECK-NEXT: lsr x9, x9, #60 |
| ; CHECK-NEXT: mov.b v1[14], w10 |
| ; CHECK-NEXT: mov.b v1[15], w9 |
| ; CHECK-NEXT: ext.16b v2, v1, v1, #8 |
| ; CHECK-NEXT: zip2.8b v3, v1, v0 |
| ; CHECK-NEXT: zip1.8b v1, v1, v0 |
| ; CHECK-NEXT: zip1.8b v4, v2, v0 |
| ; CHECK-NEXT: zip2.8b v2, v2, v0 |
| ; CHECK-NEXT: ushll.4s v3, v3, #0 |
| ; CHECK-NEXT: ushll.4s v1, v1, #0 |
| ; CHECK-NEXT: and.16b v3, v3, v0 |
| ; CHECK-NEXT: and.16b v1, v1, v0 |
| ; CHECK-NEXT: stp q1, q3, [x1] |
| ; CHECK-NEXT: ushll.4s v1, v2, #0 |
| ; CHECK-NEXT: ushll.4s v2, v4, #0 |
| ; CHECK-NEXT: and.16b v1, v1, v0 |
| ; CHECK-NEXT: and.16b v2, v2, v0 |
| ; CHECK-NEXT: stp q2, q1, [x1, #32] |
| ; CHECK-NEXT: add x1, x1, #64 |
| ; CHECK-NEXT: b.ne LBB13_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i4_to_v16i32_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: movi v0.4s, #15 |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB13_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: ldr x9, [x0, x8] |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: lsr x10, x9, #60 |
| ; CHECK-BE-NEXT: ubfx x11, x9, #56, #4 |
| ; CHECK-BE-NEXT: fmov s1, w10 |
| ; CHECK-BE-NEXT: ubfx x10, x9, #52, #4 |
| ; CHECK-BE-NEXT: mov v1.b[1], w11 |
| ; CHECK-BE-NEXT: add x11, x1, #32 |
| ; CHECK-BE-NEXT: mov v1.b[2], w10 |
| ; CHECK-BE-NEXT: ubfx x10, x9, #48, #4 |
| ; CHECK-BE-NEXT: mov v1.b[3], w10 |
| ; CHECK-BE-NEXT: ubfx x10, x9, #44, #4 |
| ; CHECK-BE-NEXT: mov v1.b[4], w10 |
| ; CHECK-BE-NEXT: ubfx x10, x9, #40, #4 |
| ; CHECK-BE-NEXT: mov v1.b[5], w10 |
| ; CHECK-BE-NEXT: ubfx x10, x9, #36, #4 |
| ; CHECK-BE-NEXT: mov v1.b[6], w10 |
| ; CHECK-BE-NEXT: ubfx x10, x9, #32, #4 |
| ; CHECK-BE-NEXT: mov v1.b[7], w10 |
| ; CHECK-BE-NEXT: ubfx x10, x9, #28, #4 |
| ; CHECK-BE-NEXT: mov v1.b[8], w10 |
| ; CHECK-BE-NEXT: ubfx w10, w9, #24, #4 |
| ; CHECK-BE-NEXT: mov v1.b[9], w10 |
| ; CHECK-BE-NEXT: ubfx w10, w9, #20, #4 |
| ; CHECK-BE-NEXT: mov v1.b[10], w10 |
| ; CHECK-BE-NEXT: ubfx w10, w9, #16, #4 |
| ; CHECK-BE-NEXT: mov v1.b[11], w10 |
| ; CHECK-BE-NEXT: ubfx w10, w9, #12, #4 |
| ; CHECK-BE-NEXT: mov v1.b[12], w10 |
| ; CHECK-BE-NEXT: ubfx w10, w9, #8, #4 |
| ; CHECK-BE-NEXT: mov v1.b[13], w10 |
| ; CHECK-BE-NEXT: ubfx w10, w9, #4, #4 |
| ; CHECK-BE-NEXT: and w9, w9, #0xf |
| ; CHECK-BE-NEXT: mov v1.b[14], w10 |
| ; CHECK-BE-NEXT: add x10, x1, #48 |
| ; CHECK-BE-NEXT: mov v1.b[15], w9 |
| ; CHECK-BE-NEXT: add x9, x1, #16 |
| ; CHECK-BE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: zip2 v3.8b, v1.8b, v0.8b |
| ; CHECK-BE-NEXT: zip1 v1.8b, v1.8b, v0.8b |
| ; CHECK-BE-NEXT: zip1 v4.8b, v2.8b, v0.8b |
| ; CHECK-BE-NEXT: zip2 v2.8b, v2.8b, v0.8b |
| ; CHECK-BE-NEXT: rev16 v1.8b, v1.8b |
| ; CHECK-BE-NEXT: rev16 v3.8b, v3.8b |
| ; CHECK-BE-NEXT: rev16 v4.8b, v4.8b |
| ; CHECK-BE-NEXT: rev16 v2.8b, v2.8b |
| ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 |
| ; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0 |
| ; CHECK-BE-NEXT: and v1.16b, v1.16b, v0.16b |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #64 |
| ; CHECK-BE-NEXT: ushll v1.4s, v2.4h, #0 |
| ; CHECK-BE-NEXT: ushll v2.4s, v4.4h, #0 |
| ; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b |
| ; CHECK-BE-NEXT: and v1.16b, v1.16b, v0.16b |
| ; CHECK-BE-NEXT: st1 { v3.4s }, [x9] |
| ; CHECK-BE-NEXT: and v2.16b, v2.16b, v0.16b |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x10] |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x11] |
| ; CHECK-BE-NEXT: b.ne .LBB13_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i4, ptr %src, i64 %iv |
| %load = load <16 x i4>, ptr %src.gep |
| %ext = zext <16 x i4> %load to <16 x i32> |
| %dst.gep = getelementptr i32, ptr %dst, i64 %iv |
| store <16 x i32> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v16i16_to_v16i64_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v16i16_to_v16i64_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB14_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: add x9, x0, x8 |
| ; CHECK-NEXT: add x8, x8, #32 |
| ; CHECK-NEXT: cmp x8, #256 |
| ; CHECK-NEXT: ldp q0, q1, [x9] |
| ; CHECK-NEXT: ushll.4s v2, v0, #0 |
| ; CHECK-NEXT: ushll2.4s v0, v0, #0 |
| ; CHECK-NEXT: ushll.4s v3, v1, #0 |
| ; CHECK-NEXT: ushll2.4s v1, v1, #0 |
| ; CHECK-NEXT: ushll2.2d v5, v0, #0 |
| ; CHECK-NEXT: ushll2.2d v4, v1, #0 |
| ; CHECK-NEXT: ushll.2d v1, v1, #0 |
| ; CHECK-NEXT: ushll.2d v0, v0, #0 |
| ; CHECK-NEXT: stp q1, q4, [x1, #96] |
| ; CHECK-NEXT: ushll2.2d v1, v3, #0 |
| ; CHECK-NEXT: stp q0, q5, [x1, #32] |
| ; CHECK-NEXT: ushll.2d v3, v3, #0 |
| ; CHECK-NEXT: ushll2.2d v0, v2, #0 |
| ; CHECK-NEXT: stp q3, q1, [x1, #64] |
| ; CHECK-NEXT: ushll.2d v1, v2, #0 |
| ; CHECK-NEXT: stp q1, q0, [x1], #128 |
| ; CHECK-NEXT: b.ne LBB14_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i16_to_v16i64_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB14_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x10, x1, #48 |
| ; CHECK-BE-NEXT: add x8, x8, #32 |
| ; CHECK-BE-NEXT: cmp x8, #256 |
| ; CHECK-BE-NEXT: ld1 { v0.8h }, [x9] |
| ; CHECK-BE-NEXT: add x9, x9, #16 |
| ; CHECK-BE-NEXT: ld1 { v2.8h }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #32 |
| ; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0 |
| ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x1, #112 |
| ; CHECK-BE-NEXT: st1 { v1.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #16 |
| ; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0 |
| ; CHECK-BE-NEXT: ushll2 v1.4s, v2.8h, #0 |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #96 |
| ; CHECK-BE-NEXT: ushll2 v4.2d, v1.4s, #0 |
| ; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 |
| ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v4.2d }, [x10] |
| ; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0 |
| ; CHECK-BE-NEXT: add x10, x1, #80 |
| ; CHECK-BE-NEXT: st1 { v0.2d }, [x1] |
| ; CHECK-BE-NEXT: st1 { v1.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #64 |
| ; CHECK-BE-NEXT: add x1, x1, #128 |
| ; CHECK-BE-NEXT: ushll v3.2d, v2.2s, #0 |
| ; CHECK-BE-NEXT: ushll2 v2.2d, v2.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x9] |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x10] |
| ; CHECK-BE-NEXT: b.ne .LBB14_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i16, ptr %src, i64 %iv |
| %load = load <16 x i16>, ptr %src.gep |
| %ext = zext <16 x i16> %load to <16 x i64> |
| %dst.gep = getelementptr i64, ptr %dst, i64 %iv |
| store <16 x i64> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v16i32_to_v16i64_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v16i32_to_v16i64_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB15_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: add x9, x0, x8 |
| ; CHECK-NEXT: add x8, x8, #64 |
| ; CHECK-NEXT: cmp x8, #512 |
| ; CHECK-NEXT: ldp q1, q0, [x9, #32] |
| ; CHECK-NEXT: ushll2.2d v5, v1, #0 |
| ; CHECK-NEXT: ushll.2d v1, v1, #0 |
| ; CHECK-NEXT: ldp q3, q2, [x9] |
| ; CHECK-NEXT: ushll2.2d v4, v0, #0 |
| ; CHECK-NEXT: stp q1, q5, [x1, #64] |
| ; CHECK-NEXT: ushll.2d v0, v0, #0 |
| ; CHECK-NEXT: stp q0, q4, [x1, #96] |
| ; CHECK-NEXT: ushll2.2d v1, v3, #0 |
| ; CHECK-NEXT: ushll2.2d v0, v2, #0 |
| ; CHECK-NEXT: ushll.2d v2, v2, #0 |
| ; CHECK-NEXT: stp q2, q0, [x1, #32] |
| ; CHECK-NEXT: ushll.2d v0, v3, #0 |
| ; CHECK-NEXT: stp q0, q1, [x1], #128 |
| ; CHECK-NEXT: b.ne LBB15_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i32_to_v16i64_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB15_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x8, x8, #64 |
| ; CHECK-BE-NEXT: add x10, x9, #48 |
| ; CHECK-BE-NEXT: add x11, x9, #32 |
| ; CHECK-BE-NEXT: cmp x8, #512 |
| ; CHECK-BE-NEXT: ld1 { v0.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x9, #16 |
| ; CHECK-BE-NEXT: ld1 { v1.4s }, [x10] |
| ; CHECK-BE-NEXT: add x10, x1, #16 |
| ; CHECK-BE-NEXT: ld1 { v2.4s }, [x11] |
| ; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0 |
| ; CHECK-BE-NEXT: ld1 { v4.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #112 |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x1, #80 |
| ; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0 |
| ; CHECK-BE-NEXT: ushll2 v5.2d, v2.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #48 |
| ; CHECK-BE-NEXT: st1 { v5.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x1, #96 |
| ; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 |
| ; CHECK-BE-NEXT: ushll v3.2d, v4.2s, #0 |
| ; CHECK-BE-NEXT: ushll2 v4.2d, v4.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v0.2d }, [x1] |
| ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v4.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #64 |
| ; CHECK-BE-NEXT: st1 { v1.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x1, #32 |
| ; CHECK-BE-NEXT: add x1, x1, #128 |
| ; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x10] |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x9] |
| ; CHECK-BE-NEXT: b.ne .LBB15_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i32, ptr %src, i64 %iv |
| %load = load <16 x i32>, ptr %src.gep |
| %ext = zext <16 x i32> %load to <16 x i64> |
| %dst.gep = getelementptr i64, ptr %dst, i64 %iv |
| store <16 x i64> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v8i8_to_v8i128_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v8i8_to_v8i128_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB16_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d0, [x0, x8] |
| ; CHECK-NEXT: add x9, x1, #112 |
| ; CHECK-NEXT: add x10, x1, #80 |
| ; CHECK-NEXT: str xzr, [x1, #120] |
| ; CHECK-NEXT: str xzr, [x1, #104] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: str xzr, [x1, #88] |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: str xzr, [x1, #72] |
| ; CHECK-NEXT: str xzr, [x1, #56] |
| ; CHECK-NEXT: ushll2.4s v1, v0, #0 |
| ; CHECK-NEXT: str xzr, [x1, #40] |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: str xzr, [x1, #24] |
| ; CHECK-NEXT: ushll2.2d v2, v1, #0 |
| ; CHECK-NEXT: str xzr, [x1, #8] |
| ; CHECK-NEXT: ushll.2d v1, v1, #0 |
| ; CHECK-NEXT: st1.d { v2 }[1], [x9] |
| ; CHECK-NEXT: add x9, x1, #48 |
| ; CHECK-NEXT: str d2, [x1, #96] |
| ; CHECK-NEXT: ushll2.2d v2, v0, #0 |
| ; CHECK-NEXT: st1.d { v1 }[1], [x10] |
| ; CHECK-NEXT: ushll.2d v0, v0, #0 |
| ; CHECK-NEXT: str d1, [x1, #64] |
| ; CHECK-NEXT: str d2, [x1, #32] |
| ; CHECK-NEXT: add x10, x1, #16 |
| ; CHECK-NEXT: str d0, [x1] |
| ; CHECK-NEXT: add x1, x1, #256 |
| ; CHECK-NEXT: st1.d { v2 }[1], [x9] |
| ; CHECK-NEXT: st1.d { v0 }[1], [x10] |
| ; CHECK-NEXT: b.ne LBB16_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v8i8_to_v8i128_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB16_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x10, x1, #88 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #120 |
| ; CHECK-BE-NEXT: str xzr, [x1, #112] |
| ; CHECK-BE-NEXT: str xzr, [x1, #96] |
| ; CHECK-BE-NEXT: str xzr, [x1, #80] |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: str xzr, [x1, #64] |
| ; CHECK-BE-NEXT: str xzr, [x1, #48] |
| ; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: str xzr, [x1, #32] |
| ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: str xzr, [x1, #16] |
| ; CHECK-BE-NEXT: ushll2 v2.2d, v1.4s, #0 |
| ; CHECK-BE-NEXT: str xzr, [x1] |
| ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v2.d }[1], [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #56 |
| ; CHECK-BE-NEXT: str d2, [x1, #104] |
| ; CHECK-BE-NEXT: ushll2 v2.2d, v0.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v1.d }[1], [x10] |
| ; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 |
| ; CHECK-BE-NEXT: str d1, [x1, #72] |
| ; CHECK-BE-NEXT: str d2, [x1, #40] |
| ; CHECK-BE-NEXT: add x10, x1, #24 |
| ; CHECK-BE-NEXT: str d0, [x1, #8] |
| ; CHECK-BE-NEXT: add x1, x1, #256 |
| ; CHECK-BE-NEXT: st1 { v2.d }[1], [x9] |
| ; CHECK-BE-NEXT: st1 { v0.d }[1], [x10] |
| ; CHECK-BE-NEXT: b.ne .LBB16_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <8 x i8>, ptr %src.gep |
| %ext = zext <8 x i8> %load to <8 x i128> |
| %dst.gep = getelementptr i128, ptr %dst, i64 %iv |
| store <8 x i128> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; multiple back-to-back 'zext' of similar type of vectors combined with arithmetic operations |
| define void @zext_v8i8_to_v8i64_with_add_in_sequence_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v8i8_to_v8i64_with_add_in_sequence_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: Lloh18: |
| ; CHECK-NEXT: adrp x9, lCPI17_0@PAGE |
| ; CHECK-NEXT: Lloh19: |
| ; CHECK-NEXT: adrp x10, lCPI17_1@PAGE |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: Lloh20: |
| ; CHECK-NEXT: ldr q0, [x9, lCPI17_0@PAGEOFF] |
| ; CHECK-NEXT: add x9, x0, #8 |
| ; CHECK-NEXT: Lloh21: |
| ; CHECK-NEXT: ldr q1, [x10, lCPI17_1@PAGEOFF] |
| ; CHECK-NEXT: LBB17_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: add x10, x1, x8 |
| ; CHECK-NEXT: add x8, x8, #128 |
| ; CHECK-NEXT: ldp d2, d3, [x9, #-8] |
| ; CHECK-NEXT: add x9, x9, #16 |
| ; CHECK-NEXT: cmp x8, #1024 |
| ; CHECK-NEXT: ldp q5, q4, [x10, #32] |
| ; CHECK-NEXT: tbl.16b v6, { v2 }, v1 |
| ; CHECK-NEXT: tbl.16b v2, { v2 }, v0 |
| ; CHECK-NEXT: tbl.16b v17, { v3 }, v0 |
| ; CHECK-NEXT: tbl.16b v3, { v3 }, v1 |
| ; CHECK-NEXT: ldp q16, q7, [x10] |
| ; CHECK-NEXT: uaddw2.2d v4, v4, v6 |
| ; CHECK-NEXT: uaddw.2d v5, v5, v6 |
| ; CHECK-NEXT: stp q5, q4, [x10, #32] |
| ; CHECK-NEXT: ldp q19, q18, [x10, #96] |
| ; CHECK-NEXT: uaddw2.2d v7, v7, v2 |
| ; CHECK-NEXT: uaddw.2d v2, v16, v2 |
| ; CHECK-NEXT: stp q2, q7, [x10] |
| ; CHECK-NEXT: ldp q6, q20, [x10, #64] |
| ; CHECK-NEXT: uaddw2.2d v4, v18, v3 |
| ; CHECK-NEXT: uaddw.2d v3, v19, v3 |
| ; CHECK-NEXT: stp q3, q4, [x10, #96] |
| ; CHECK-NEXT: uaddw2.2d v2, v20, v17 |
| ; CHECK-NEXT: uaddw.2d v4, v6, v17 |
| ; CHECK-NEXT: stp q4, q2, [x10, #64] |
| ; CHECK-NEXT: b.ne LBB17_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .loh AdrpLdr Lloh19, Lloh21 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh20 |
| ; |
| ; CHECK-BE-LABEL: zext_v8i8_to_v8i64_with_add_in_sequence_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: adrp x9, .LCPI17_0 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI17_0 |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] |
| ; CHECK-BE-NEXT: adrp x9, .LCPI17_1 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI17_1 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x0, #8 |
| ; CHECK-BE-NEXT: .LBB17_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: sub x12, x9, #8 |
| ; CHECK-BE-NEXT: add x10, x1, x8 |
| ; CHECK-BE-NEXT: ld1 { v2.8b }, [x9] |
| ; CHECK-BE-NEXT: add x11, x10, #32 |
| ; CHECK-BE-NEXT: add x13, x10, #48 |
| ; CHECK-BE-NEXT: add x14, x10, #16 |
| ; CHECK-BE-NEXT: ld1 { v4.8b }, [x12] |
| ; CHECK-BE-NEXT: add x15, x10, #64 |
| ; CHECK-BE-NEXT: ld1 { v3.2d }, [x11] |
| ; CHECK-BE-NEXT: add x12, x10, #96 |
| ; CHECK-BE-NEXT: tbl v6.16b, { v2.16b }, v1.16b |
| ; CHECK-BE-NEXT: add x16, x10, #112 |
| ; CHECK-BE-NEXT: tbl v2.16b, { v2.16b }, v0.16b |
| ; CHECK-BE-NEXT: ld1 { v7.2d }, [x13] |
| ; CHECK-BE-NEXT: tbl v16.16b, { v4.16b }, v0.16b |
| ; CHECK-BE-NEXT: add x17, x10, #80 |
| ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v1.16b |
| ; CHECK-BE-NEXT: ld1 { v18.2d }, [x14] |
| ; CHECK-BE-NEXT: rev32 v17.8b, v6.8b |
| ; CHECK-BE-NEXT: add x8, x8, #128 |
| ; CHECK-BE-NEXT: ext v6.16b, v6.16b, v6.16b, #8 |
| ; CHECK-BE-NEXT: ld1 { v5.2d }, [x10] |
| ; CHECK-BE-NEXT: ext v23.16b, v16.16b, v16.16b, #8 |
| ; CHECK-BE-NEXT: add x9, x9, #16 |
| ; CHECK-BE-NEXT: ext v21.16b, v4.16b, v4.16b, #8 |
| ; CHECK-BE-NEXT: ld1 { v20.2d }, [x12] |
| ; CHECK-BE-NEXT: rev32 v4.8b, v4.8b |
| ; CHECK-BE-NEXT: cmp x8, #1024 |
| ; CHECK-BE-NEXT: ext v19.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ld1 { v22.2d }, [x15] |
| ; CHECK-BE-NEXT: rev32 v2.8b, v2.8b |
| ; CHECK-BE-NEXT: rev32 v21.8b, v21.8b |
| ; CHECK-BE-NEXT: ld1 { v24.2d }, [x16] |
| ; CHECK-BE-NEXT: uaddw v3.2d, v3.2d, v4.2s |
| ; CHECK-BE-NEXT: rev32 v4.8b, v23.8b |
| ; CHECK-BE-NEXT: ld1 { v23.2d }, [x17] |
| ; CHECK-BE-NEXT: rev32 v16.8b, v16.8b |
| ; CHECK-BE-NEXT: rev32 v6.8b, v6.8b |
| ; CHECK-BE-NEXT: rev32 v19.8b, v19.8b |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x11] |
| ; CHECK-BE-NEXT: uaddw v3.2d, v7.2d, v21.2s |
| ; CHECK-BE-NEXT: uaddw v4.2d, v18.2d, v4.2s |
| ; CHECK-BE-NEXT: uaddw v5.2d, v5.2d, v16.2s |
| ; CHECK-BE-NEXT: uaddw v7.2d, v20.2d, v17.2s |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x13] |
| ; CHECK-BE-NEXT: uaddw v2.2d, v22.2d, v2.2s |
| ; CHECK-BE-NEXT: st1 { v4.2d }, [x14] |
| ; CHECK-BE-NEXT: uaddw v3.2d, v24.2d, v6.2s |
| ; CHECK-BE-NEXT: st1 { v5.2d }, [x10] |
| ; CHECK-BE-NEXT: uaddw v4.2d, v23.2d, v19.2s |
| ; CHECK-BE-NEXT: st1 { v7.2d }, [x12] |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x15] |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x16] |
| ; CHECK-BE-NEXT: st1 { v4.2d }, [x17] |
| ; CHECK-BE-NEXT: b.ne .LBB17_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <8 x i8>, ptr %src.gep |
| %src.gep.2 = getelementptr i8, ptr %src.gep, i64 8 |
| %load.2 = load <8 x i8>, ptr %src.gep.2 |
| %ext = zext <8 x i8> %load to <8 x i64> |
| %ext.2 = zext <8 x i8> %load.2 to <8 x i64> |
| %dst.gep = getelementptr i64, ptr %dst, i64 %iv |
| %load.dst = load <8 x i64>, ptr %dst.gep |
| %dst.gep.2 = getelementptr i64, ptr %dst.gep, i64 8 |
| %load.dst.2 = load <8 x i64>, ptr %dst.gep.2 |
| %sum = add <8 x i64> %load.dst, %ext |
| %sum.2 = add <8 x i64> %load.dst.2, %ext.2 |
| store <8 x i64> %sum, ptr %dst.gep |
| store <8 x i64> %sum.2, ptr %dst.gep.2 |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; multiple back-to-back 'zext' of similar type of vectors |
| define void @zext_v16i8_to_v16i64_in_sequence_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v16i8_to_v16i64_in_sequence_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: add x9, x1, #128 |
| ; CHECK-NEXT: LBB18_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: add x10, x0, x8 |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ldp q0, q1, [x10] |
| ; CHECK-NEXT: ushll.8h v2, v0, #0 |
| ; CHECK-NEXT: ushll2.8h v0, v0, #0 |
| ; CHECK-NEXT: ushll.4s v4, v2, #0 |
| ; CHECK-NEXT: ushll2.4s v5, v0, #0 |
| ; CHECK-NEXT: ushll2.4s v2, v2, #0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: ushll2.2d v6, v5, #0 |
| ; CHECK-NEXT: ushll.2d v5, v5, #0 |
| ; CHECK-NEXT: ushll2.8h v3, v1, #0 |
| ; CHECK-NEXT: ushll2.2d v7, v0, #0 |
| ; CHECK-NEXT: stp q5, q6, [x9, #-32] |
| ; CHECK-NEXT: ushll.2d v0, v0, #0 |
| ; CHECK-NEXT: ushll2.2d v5, v2, #0 |
| ; CHECK-NEXT: ushll.2d v2, v2, #0 |
| ; CHECK-NEXT: stp q0, q7, [x9, #-64] |
| ; CHECK-NEXT: ushll2.2d v0, v4, #0 |
| ; CHECK-NEXT: stp q2, q5, [x9, #-96] |
| ; CHECK-NEXT: ushll2.4s v5, v3, #0 |
| ; CHECK-NEXT: ushll.2d v2, v4, #0 |
| ; CHECK-NEXT: ushll2.2d v4, v5, #0 |
| ; CHECK-NEXT: stp q2, q0, [x9, #-128] |
| ; CHECK-NEXT: ushll.2d v0, v5, #0 |
| ; CHECK-NEXT: ushll.4s v2, v3, #0 |
| ; CHECK-NEXT: stp q0, q4, [x9, #96] |
| ; CHECK-NEXT: ushll.8h v0, v1, #0 |
| ; CHECK-NEXT: ushll2.2d v1, v2, #0 |
| ; CHECK-NEXT: ushll.2d v2, v2, #0 |
| ; CHECK-NEXT: ushll2.4s v3, v0, #0 |
| ; CHECK-NEXT: stp q2, q1, [x9, #64] |
| ; CHECK-NEXT: ushll2.2d v1, v3, #0 |
| ; CHECK-NEXT: ushll.2d v2, v3, #0 |
| ; CHECK-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-NEXT: stp q2, q1, [x9, #32] |
| ; CHECK-NEXT: ushll2.2d v1, v0, #0 |
| ; CHECK-NEXT: ushll.2d v0, v0, #0 |
| ; CHECK-NEXT: stp q0, q1, [x9], #128 |
| ; CHECK-NEXT: b.ne LBB18_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i8_to_v16i64_in_sequence_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: add x9, x1, #128 |
| ; CHECK-BE-NEXT: .LBB18_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x10, x0, x8 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: add x11, x10, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x10] |
| ; CHECK-BE-NEXT: sub x10, x9, #16 |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] |
| ; CHECK-BE-NEXT: sub x11, x9, #32 |
| ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 |
| ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 |
| ; CHECK-BE-NEXT: ushll2 v4.2d, v2.4s, #0 |
| ; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v4.2d }, [x10] |
| ; CHECK-BE-NEXT: sub x10, x9, #48 |
| ; CHECK-BE-NEXT: ushll2 v4.2d, v1.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x11] |
| ; CHECK-BE-NEXT: ushll2 v2.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: sub x11, x9, #80 |
| ; CHECK-BE-NEXT: st1 { v4.2d }, [x10] |
| ; CHECK-BE-NEXT: sub x10, x9, #64 |
| ; CHECK-BE-NEXT: ushll2 v5.2d, v2.4s, #0 |
| ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 |
| ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: st1 { v5.2d }, [x11] |
| ; CHECK-BE-NEXT: sub x11, x9, #96 |
| ; CHECK-BE-NEXT: st1 { v1.2d }, [x10] |
| ; CHECK-BE-NEXT: ushll v4.8h, v3.8b, #0 |
| ; CHECK-BE-NEXT: sub x10, x9, #112 |
| ; CHECK-BE-NEXT: ushll2 v3.8h, v3.16b, #0 |
| ; CHECK-BE-NEXT: ushll v5.2d, v0.2s, #0 |
| ; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #0 |
| ; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 |
| ; CHECK-BE-NEXT: ushll v1.4s, v4.4h, #0 |
| ; CHECK-BE-NEXT: st1 { v0.2d }, [x10] |
| ; CHECK-BE-NEXT: ushll2 v6.4s, v3.8h, #0 |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x11] |
| ; CHECK-BE-NEXT: sub x11, x9, #128 |
| ; CHECK-BE-NEXT: add x10, x9, #112 |
| ; CHECK-BE-NEXT: ushll v2.2d, v1.2s, #0 |
| ; CHECK-BE-NEXT: ushll2 v0.2d, v1.4s, #0 |
| ; CHECK-BE-NEXT: ushll2 v1.2d, v6.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v5.2d }, [x11] |
| ; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0 |
| ; CHECK-BE-NEXT: add x11, x9, #96 |
| ; CHECK-BE-NEXT: st1 { v1.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x9, #80 |
| ; CHECK-BE-NEXT: ushll v5.2d, v6.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v2.2d }, [x9] |
| ; CHECK-BE-NEXT: ushll2 v1.4s, v4.8h, #0 |
| ; CHECK-BE-NEXT: ushll2 v4.2d, v3.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v5.2d }, [x11] |
| ; CHECK-BE-NEXT: add x11, x9, #48 |
| ; CHECK-BE-NEXT: st1 { v4.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x9, #64 |
| ; CHECK-BE-NEXT: ushll2 v5.2d, v1.4s, #0 |
| ; CHECK-BE-NEXT: ushll v3.2d, v3.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v5.2d }, [x11] |
| ; CHECK-BE-NEXT: add x11, x9, #16 |
| ; CHECK-BE-NEXT: st1 { v3.2d }, [x10] |
| ; CHECK-BE-NEXT: add x10, x9, #32 |
| ; CHECK-BE-NEXT: add x9, x9, #128 |
| ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 |
| ; CHECK-BE-NEXT: st1 { v0.2d }, [x11] |
| ; CHECK-BE-NEXT: st1 { v1.2d }, [x10] |
| ; CHECK-BE-NEXT: b.ne .LBB18_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <16 x i8>, ptr %src.gep |
| %src.gep.2 = getelementptr i8, ptr %src.gep, i64 16 |
| %load.2 = load <16 x i8>, ptr %src.gep.2 |
| %ext = zext <16 x i8> %load to <16 x i64> |
| %ext.2 = zext <16 x i8> %load.2 to <16 x i64> |
| %dst.gep = getelementptr i64, ptr %dst, i64 %iv |
| store <16 x i64> %ext, ptr %dst.gep |
| %dst.gep.2 = getelementptr i64, ptr %dst.gep, i64 16 |
| store <16 x i64> %ext.2, ptr %dst.gep.2 |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v16i8_to_v16i32_in_loop_scalable_vectors(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_scalable_vectors: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: ptrue p0.s |
| ; CHECK-NEXT: LBB19_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: add x9, x0, x8 |
| ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, x8] |
| ; CHECK-NEXT: ld1b { z1.s }, p0/z, [x9, #2, mul vl] |
| ; CHECK-NEXT: ld1b { z2.s }, p0/z, [x9, #3, mul vl] |
| ; CHECK-NEXT: ld1b { z3.s }, p0/z, [x9, #1, mul vl] |
| ; CHECK-NEXT: add z0.s, z0.s, z0.s |
| ; CHECK-NEXT: add x9, x1, x8, lsl #2 |
| ; CHECK-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: add z1.s, z1.s, z1.s |
| ; CHECK-NEXT: add z0.s, z3.s, z3.s |
| ; CHECK-NEXT: add z2.s, z2.s, z2.s |
| ; CHECK-NEXT: st1w { z1.s }, p0, [x9, #2, mul vl] |
| ; CHECK-NEXT: st1w { z2.s }, p0, [x9, #3, mul vl] |
| ; CHECK-NEXT: st1w { z0.s }, p0, [x9, #1, mul vl] |
| ; CHECK-NEXT: b.ne LBB19_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_scalable_vectors: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: ptrue p0.s |
| ; CHECK-BE-NEXT: .LBB19_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: ld1b { z0.s }, p0/z, [x0, x8] |
| ; CHECK-BE-NEXT: ld1b { z1.s }, p0/z, [x9, #2, mul vl] |
| ; CHECK-BE-NEXT: ld1b { z2.s }, p0/z, [x9, #3, mul vl] |
| ; CHECK-BE-NEXT: ld1b { z3.s }, p0/z, [x9, #1, mul vl] |
| ; CHECK-BE-NEXT: add z0.s, z0.s, z0.s |
| ; CHECK-BE-NEXT: add x9, x1, x8, lsl #2 |
| ; CHECK-BE-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: add z1.s, z1.s, z1.s |
| ; CHECK-BE-NEXT: add z0.s, z3.s, z3.s |
| ; CHECK-BE-NEXT: add z2.s, z2.s, z2.s |
| ; CHECK-BE-NEXT: st1w { z1.s }, p0, [x9, #2, mul vl] |
| ; CHECK-BE-NEXT: st1w { z2.s }, p0, [x9, #3, mul vl] |
| ; CHECK-BE-NEXT: st1w { z0.s }, p0, [x9, #1, mul vl] |
| ; CHECK-BE-NEXT: b.ne .LBB19_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <vscale x 16 x i8>, ptr %src.gep |
| %ext = zext <vscale x 16 x i8> %load to <vscale x 16 x i32> |
| %add = add <vscale x 16 x i32> %ext, %ext |
| %dst.gep = getelementptr i32, ptr %dst, i64 %iv |
| store <vscale x 16 x i32> %add, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; CHECK-LABEL: lCPI20_0: |
| ; CHECK-NEXT: .byte 0 ; 0x0 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 1 ; 0x1 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 2 ; 0x2 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 3 ; 0x3 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 4 ; 0x4 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 5 ; 0x5 |
| ; CHECK-NEXT:lCPI20_1: |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 6 ; 0x6 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 7 ; 0x7 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 8 ; 0x8 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 9 ; 0x9 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 10 ; 0xa |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT:lCPI20_2: |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 11 ; 0xb |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 12 ; 0xc |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 13 ; 0xd |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 14 ; 0xe |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 15 ; 0xf |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT:lCPI20_3: |
| ; CHECK-NEXT: .byte 0 ; 0x0 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 1 ; 0x1 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 2 ; 0x2 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 3 ; 0x3 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| |
| ; CHECK-BE-LABEL: .LCPI20_0: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 0 // 0x0 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 1 // 0x1 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 2 // 0x2 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 3 // 0x3 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .LCPI20_1: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 0 // 0x0 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 1 // 0x1 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 2 // 0x2 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 3 // 0x3 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 4 // 0x4 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .LCPI20_2: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 5 // 0x5 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 6 // 0x6 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 7 // 0x7 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 8 // 0x8 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 9 // 0x9 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .LCPI20_3: |
| ; CHECK-BE-NEXT: .byte 10 // 0xa |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 11 // 0xb |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 12 // 0xc |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 13 // 0xd |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 14 // 0xe |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 15 // 0xf |
| |
| define void @zext_v20i8_to_v20i24_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v20i8_to_v20i24_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: Lloh22: |
| ; CHECK-NEXT: adrp x9, lCPI20_0@PAGE |
| ; CHECK-NEXT: Lloh23: |
| ; CHECK-NEXT: adrp x10, lCPI20_1@PAGE |
| ; CHECK-NEXT: Lloh24: |
| ; CHECK-NEXT: adrp x11, lCPI20_2@PAGE |
| ; CHECK-NEXT: Lloh25: |
| ; CHECK-NEXT: adrp x12, lCPI20_3@PAGE |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: Lloh26: |
| ; CHECK-NEXT: ldr q0, [x9, lCPI20_0@PAGEOFF] |
| ; CHECK-NEXT: Lloh27: |
| ; CHECK-NEXT: ldr q1, [x10, lCPI20_1@PAGEOFF] |
| ; CHECK-NEXT: Lloh28: |
| ; CHECK-NEXT: ldr q2, [x11, lCPI20_2@PAGEOFF] |
| ; CHECK-NEXT: Lloh29: |
| ; CHECK-NEXT: ldr q3, [x12, lCPI20_3@PAGEOFF] |
| ; CHECK-NEXT: LBB20_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: add x9, x0, x8 |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ldp q4, q5, [x9] |
| ; CHECK-NEXT: add x9, x1, #56 |
| ; CHECK-NEXT: tbl.16b v6, { v4 }, v2 |
| ; CHECK-NEXT: tbl.16b v7, { v4 }, v1 |
| ; CHECK-NEXT: tbl.16b v4, { v4 }, v0 |
| ; CHECK-NEXT: tbl.16b v5, { v5 }, v3 |
| ; CHECK-NEXT: stp q7, q6, [x1, #16] |
| ; CHECK-NEXT: str q4, [x1] |
| ; CHECK-NEXT: str d5, [x1, #48] |
| ; CHECK-NEXT: add x1, x1, #64 |
| ; CHECK-NEXT: st1.s { v5 }[2], [x9] |
| ; CHECK-NEXT: b.ne LBB20_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .loh AdrpLdr Lloh25, Lloh29 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh24, Lloh28 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh23, Lloh27 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh26 |
| ; |
| ; CHECK-BE-LABEL: zext_v20i8_to_v20i24_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: adrp x8, .LCPI20_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI20_0 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI20_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI20_1 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI20_2 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI20_2 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI20_3 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI20_3 |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8] |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB20_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: add x10, x9, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v5.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #32 |
| ; CHECK-BE-NEXT: ld1 { v4.16b }, [x10] |
| ; CHECK-BE-NEXT: add x10, x1, #56 |
| ; CHECK-BE-NEXT: tbl v6.16b, { v5.16b }, v3.16b |
| ; CHECK-BE-NEXT: tbl v7.16b, { v5.16b }, v1.16b |
| ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b |
| ; CHECK-BE-NEXT: tbl v5.16b, { v5.16b }, v2.16b |
| ; CHECK-BE-NEXT: st1 { v6.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #16 |
| ; CHECK-BE-NEXT: st1 { v7.16b }, [x1] |
| ; CHECK-BE-NEXT: rev64 v16.16b, v4.16b |
| ; CHECK-BE-NEXT: rev32 v4.16b, v4.16b |
| ; CHECK-BE-NEXT: st1 { v5.16b }, [x9] |
| ; CHECK-BE-NEXT: str d16, [x1, #48] |
| ; CHECK-BE-NEXT: add x1, x1, #64 |
| ; CHECK-BE-NEXT: st1 { v4.s }[2], [x10] |
| ; CHECK-BE-NEXT: b.ne .LBB20_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <20 x i8>, ptr %src.gep |
| %ext = zext <20 x i8> %load to <20 x i24> |
| %dst.gep = getelementptr i24, ptr %dst, i64 %iv |
| store <20 x i24> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; CHECK-LABEL: lCPI21_0: |
| ; CHECK-NEXT: .byte 0 ; 0x0 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 1 ; 0x1 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 2 ; 0x2 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: lCPI21_1: |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 3 ; 0x3 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 4 ; 0x4 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 5 ; 0x5 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: lCPI21_2: |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 6 ; 0x6 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 7 ; 0x7 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: lCPI21_3: |
| ; CHECK-NEXT: .byte 8 ; 0x8 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 9 ; 0x9 |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 10 ; 0xa |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: lCPI21_4: |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 11 ; 0xb |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 12 ; 0xc |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 13 ; 0xd |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: lCPI21_5: |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 14 ; 0xe |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 15 ; 0xf |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| ; CHECK-NEXT: .byte 255 ; 0xff |
| |
| ; CHECK-BE-LABEL: .LCPI21_0: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 5 // 0x5 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 6 // 0x6 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .LCPI21_1: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 0 // 0x0 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 1 // 0x1 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .LCPI21_2: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 2 // 0x2 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 3 // 0x3 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 4 // 0x4 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .LCPI21_3: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 5 // 0x5 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 6 // 0x6 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 7 // 0x7 |
| ; CHECK-BE-NEXT: .LCPI21_4: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 8 // 0x8 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 9 // 0x9 |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .LCPI21_5: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 10 // 0xa |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 11 // 0xb |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 12 // 0xc |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .LCPI21_6: |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 13 // 0xd |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 14 // 0xe |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 255 // 0xff |
| ; CHECK-BE-NEXT: .byte 15 // 0xf |
| |
| define void @zext_v23i8_to_v23i48_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v23i8_to_v23i48_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: Lloh30: |
| ; CHECK-NEXT: adrp x9, lCPI21_0@PAGE |
| ; CHECK-NEXT: Lloh31: |
| ; CHECK-NEXT: adrp x10, lCPI21_1@PAGE |
| ; CHECK-NEXT: Lloh32: |
| ; CHECK-NEXT: adrp x11, lCPI21_2@PAGE |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: Lloh33: |
| ; CHECK-NEXT: ldr q0, [x9, lCPI21_0@PAGEOFF] |
| ; CHECK-NEXT: Lloh34: |
| ; CHECK-NEXT: adrp x9, lCPI21_3@PAGE |
| ; CHECK-NEXT: Lloh35: |
| ; CHECK-NEXT: ldr q1, [x10, lCPI21_1@PAGEOFF] |
| ; CHECK-NEXT: Lloh36: |
| ; CHECK-NEXT: adrp x10, lCPI21_4@PAGE |
| ; CHECK-NEXT: Lloh37: |
| ; CHECK-NEXT: ldr q2, [x11, lCPI21_2@PAGEOFF] |
| ; CHECK-NEXT: Lloh38: |
| ; CHECK-NEXT: adrp x11, lCPI21_5@PAGE |
| ; CHECK-NEXT: Lloh39: |
| ; CHECK-NEXT: ldr q3, [x9, lCPI21_3@PAGEOFF] |
| ; CHECK-NEXT: Lloh40: |
| ; CHECK-NEXT: ldr q4, [x10, lCPI21_4@PAGEOFF] |
| ; CHECK-NEXT: Lloh41: |
| ; CHECK-NEXT: ldr q5, [x11, lCPI21_5@PAGEOFF] |
| ; CHECK-NEXT: LBB21_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: add x9, x0, x8 |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: movi.2d v6, #0000000000000000 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: ldp q16, q7, [x9] |
| ; CHECK-NEXT: strh wzr, [x1, #136] |
| ; CHECK-NEXT: tbl.16b v18, { v16 }, v5 |
| ; CHECK-NEXT: tbl.16b v19, { v16 }, v4 |
| ; CHECK-NEXT: mov.b v6[4], v7[6] |
| ; CHECK-NEXT: tbl.16b v17, { v7 }, v1 |
| ; CHECK-NEXT: tbl.16b v7, { v7 }, v0 |
| ; CHECK-NEXT: tbl.16b v20, { v16 }, v3 |
| ; CHECK-NEXT: stp q19, q18, [x1, #64] |
| ; CHECK-NEXT: fmov x9, d6 |
| ; CHECK-NEXT: stp q7, q17, [x1, #96] |
| ; CHECK-NEXT: tbl.16b v17, { v16 }, v2 |
| ; CHECK-NEXT: tbl.16b v7, { v16 }, v1 |
| ; CHECK-NEXT: tbl.16b v16, { v16 }, v0 |
| ; CHECK-NEXT: stp q17, q20, [x1, #32] |
| ; CHECK-NEXT: stp q16, q7, [x1] |
| ; CHECK-NEXT: str x9, [x1, #128]! |
| ; CHECK-NEXT: b.ne LBB21_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .loh AdrpLdr Lloh38, Lloh41 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh36, Lloh40 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh39 |
| ; CHECK-NEXT: .loh AdrpAdrp Lloh32, Lloh38 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh37 |
| ; CHECK-NEXT: .loh AdrpAdrp Lloh31, Lloh36 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh31, Lloh35 |
| ; CHECK-NEXT: .loh AdrpAdrp Lloh30, Lloh34 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh33 |
| ; |
| ; CHECK-BE-LABEL: zext_v23i8_to_v23i48_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: adrp x8, .LCPI21_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI21_0 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI21_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI21_1 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI21_2 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI21_2 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI21_3 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI21_3 |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI21_4 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI21_4 |
| ; CHECK-BE-NEXT: ld1 { v4.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI21_5 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI21_5 |
| ; CHECK-BE-NEXT: ld1 { v5.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI21_6 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI21_6 |
| ; CHECK-BE-NEXT: ld1 { v6.16b }, [x8] |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB21_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x11, x1, #64 |
| ; CHECK-BE-NEXT: add x10, x1, #80 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v7.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x9, #16 |
| ; CHECK-BE-NEXT: ld1 { v18.16b }, [x9] |
| ; CHECK-BE-NEXT: add x9, x1, #48 |
| ; CHECK-BE-NEXT: tbl v17.16b, { v7.16b }, v5.16b |
| ; CHECK-BE-NEXT: tbl v16.16b, { v7.16b }, v6.16b |
| ; CHECK-BE-NEXT: tbl v19.16b, { v7.16b }, v3.16b |
| ; CHECK-BE-NEXT: tbl v20.16b, { v18.16b }, v0.16b |
| ; CHECK-BE-NEXT: st1 { v17.16b }, [x11] |
| ; CHECK-BE-NEXT: add x11, x1, #16 |
| ; CHECK-BE-NEXT: tbl v17.16b, { v7.16b }, v4.16b |
| ; CHECK-BE-NEXT: st1 { v16.16b }, [x10] |
| ; CHECK-BE-NEXT: add x10, x1, #32 |
| ; CHECK-BE-NEXT: tbl v16.16b, { v7.16b }, v1.16b |
| ; CHECK-BE-NEXT: tbl v7.16b, { v7.16b }, v2.16b |
| ; CHECK-BE-NEXT: tbl v21.16b, { v18.16b }, v1.16b |
| ; CHECK-BE-NEXT: st1 { v17.16b }, [x9] |
| ; CHECK-BE-NEXT: tbl v17.16b, { v18.16b }, v2.16b |
| ; CHECK-BE-NEXT: add x9, x1, #112 |
| ; CHECK-BE-NEXT: rev64 v18.16b, v20.16b |
| ; CHECK-BE-NEXT: st1 { v19.16b }, [x10] |
| ; CHECK-BE-NEXT: rev16 v19.16b, v20.16b |
| ; CHECK-BE-NEXT: add x10, x1, #96 |
| ; CHECK-BE-NEXT: st1 { v7.16b }, [x11] |
| ; CHECK-BE-NEXT: add x11, x1, #136 |
| ; CHECK-BE-NEXT: st1 { v17.16b }, [x9] |
| ; CHECK-BE-NEXT: fmov x9, d18 |
| ; CHECK-BE-NEXT: st1 { v21.16b }, [x10] |
| ; CHECK-BE-NEXT: st1 { v19.h }[4], [x11] |
| ; CHECK-BE-NEXT: st1 { v16.16b }, [x1] |
| ; CHECK-BE-NEXT: str x9, [x1, #128]! |
| ; CHECK-BE-NEXT: b.ne .LBB21_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <23 x i8>, ptr %src.gep |
| %ext = zext <23 x i8> %load to <23 x i48> |
| %dst.gep = getelementptr i48, ptr %dst, i64 %iv |
| store <23 x i48> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @zext_v8i8_to_v8i33_in_loop(ptr %src, ptr %dst) { |
| ; CHECK-LABEL: zext_v8i8_to_v8i33_in_loop: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, xzr |
| ; CHECK-NEXT: LBB22_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr d0, [x0, x8] |
| ; CHECK-NEXT: add x8, x8, #16 |
| ; CHECK-NEXT: cmp x8, #128 |
| ; CHECK-NEXT: strb wzr, [x1, #32] |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: ushll.4s v1, v0, #0 |
| ; CHECK-NEXT: ushll2.4s v0, v0, #0 |
| ; CHECK-NEXT: ushll.2d v2, v1, #0 |
| ; CHECK-NEXT: ushll2.2d v3, v0, #0 |
| ; CHECK-NEXT: ushll2.2d v1, v1, #0 |
| ; CHECK-NEXT: mov.d x9, v3[1] |
| ; CHECK-NEXT: fmov x10, d3 |
| ; CHECK-NEXT: mov.d x12, v1[1] |
| ; CHECK-NEXT: ushll.2d v0, v0, #0 |
| ; CHECK-NEXT: lsl x9, x9, #39 |
| ; CHECK-NEXT: orr x9, x9, x10, lsl #6 |
| ; CHECK-NEXT: fmov x10, d1 |
| ; CHECK-NEXT: mov.d x11, v0[1] |
| ; CHECK-NEXT: lsl x12, x12, #35 |
| ; CHECK-NEXT: mov.d x14, v2[1] |
| ; CHECK-NEXT: fmov x13, d0 |
| ; CHECK-NEXT: orr x10, x12, x10, lsl #2 |
| ; CHECK-NEXT: fmov x12, d2 |
| ; CHECK-NEXT: lsl x11, x11, #37 |
| ; CHECK-NEXT: orr x11, x11, x13, lsl #4 |
| ; CHECK-NEXT: orr x12, x12, x14, lsl #33 |
| ; CHECK-NEXT: stp x11, x9, [x1, #16] |
| ; CHECK-NEXT: stp x12, x10, [x1], #128 |
| ; CHECK-NEXT: b.ne LBB22_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: zext_v8i8_to_v8i33_in_loop: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, xzr |
| ; CHECK-BE-NEXT: .LBB22_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: add x9, x0, x8 |
| ; CHECK-BE-NEXT: add x8, x8, #16 |
| ; CHECK-BE-NEXT: cmp x8, #128 |
| ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] |
| ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll v1.4s, v0.4h, #0 |
| ; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0 |
| ; CHECK-BE-NEXT: ushll v2.2d, v1.2s, #0 |
| ; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0 |
| ; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 |
| ; CHECK-BE-NEXT: ushll2 v1.2d, v1.4s, #0 |
| ; CHECK-BE-NEXT: mov x9, v3.d[1] |
| ; CHECK-BE-NEXT: fmov x10, d3 |
| ; CHECK-BE-NEXT: mov x11, v0.d[1] |
| ; CHECK-BE-NEXT: fmov x12, d0 |
| ; CHECK-BE-NEXT: mov x13, v1.d[1] |
| ; CHECK-BE-NEXT: mov x14, v2.d[1] |
| ; CHECK-BE-NEXT: orr x10, x9, x10, lsl #33 |
| ; CHECK-BE-NEXT: fmov x15, d1 |
| ; CHECK-BE-NEXT: strb w9, [x1, #32] |
| ; CHECK-BE-NEXT: fmov x16, d2 |
| ; CHECK-BE-NEXT: lsl x11, x11, #2 |
| ; CHECK-BE-NEXT: lsl x13, x13, #4 |
| ; CHECK-BE-NEXT: orr x12, x11, x12, lsl #35 |
| ; CHECK-BE-NEXT: lsl x14, x14, #6 |
| ; CHECK-BE-NEXT: orr x15, x13, x15, lsl #37 |
| ; CHECK-BE-NEXT: extr x10, x11, x10, #8 |
| ; CHECK-BE-NEXT: orr x11, x14, x16, lsl #39 |
| ; CHECK-BE-NEXT: extr x12, x13, x12, #8 |
| ; CHECK-BE-NEXT: extr x9, x14, x15, #8 |
| ; CHECK-BE-NEXT: extr x11, xzr, x11, #8 |
| ; CHECK-BE-NEXT: stp x12, x10, [x1, #16] |
| ; CHECK-BE-NEXT: stp x11, x9, [x1], #128 |
| ; CHECK-BE-NEXT: b.ne .LBB22_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| |
| |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %src.gep = getelementptr i8, ptr %src, i64 %iv |
| %load = load <8 x i8>, ptr %src.gep |
| %ext = zext <8 x i8> %load to <8 x i33> |
| %dst.gep = getelementptr i33, ptr %dst, i64 %iv |
| store <8 x i33> %ext, ptr %dst.gep |
| %iv.next = add nuw i64 %iv, 16 |
| %ec = icmp eq i64 %iv.next, 128 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| ; FIXME: Widening instructions should be used instead of tbl. |
| define i32 @test_pr62620_widening_instr(ptr %p1, ptr %p2, i64 %lx, i32 %h) { |
| ; CHECK-LABEL: test_pr62620_widening_instr: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, x0 |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: lsl x9, x2, #4 |
| ; CHECK-NEXT: LBB23_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q0, [x8, x9] |
| ; CHECK-NEXT: subs w3, w3, #1 |
| ; CHECK-NEXT: ldr q1, [x1, x9] |
| ; CHECK-NEXT: uabdl.8h v2, v0, v1 |
| ; CHECK-NEXT: uabal2.8h v2, v0, v1 |
| ; CHECK-NEXT: uaddlv.8h s0, v2 |
| ; CHECK-NEXT: fmov w10, s0 |
| ; CHECK-NEXT: add w0, w10, w0 |
| ; CHECK-NEXT: b.ne LBB23_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_pr62620_widening_instr: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: mov x8, x0 |
| ; CHECK-BE-NEXT: lsl x9, x2, #4 |
| ; CHECK-BE-NEXT: mov w0, wzr |
| ; CHECK-BE-NEXT: add x8, x8, x9 |
| ; CHECK-BE-NEXT: add x9, x1, x9 |
| ; CHECK-BE-NEXT: .LBB23_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] |
| ; CHECK-BE-NEXT: subs w3, w3, #1 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] |
| ; CHECK-BE-NEXT: uabdl v2.8h, v0.8b, v1.8b |
| ; CHECK-BE-NEXT: uabal2 v2.8h, v0.16b, v1.16b |
| ; CHECK-BE-NEXT: uaddlv s0, v2.8h |
| ; CHECK-BE-NEXT: fmov w10, s0 |
| ; CHECK-BE-NEXT: add w0, w10, w0 |
| ; CHECK-BE-NEXT: b.ne .LBB23_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: ret |
| entry: |
| br label %loop |
| |
| loop: |
| %s0 = phi i32 [ 0, %entry ], [ %op.rdx, %loop ] |
| %j.0261 = phi i32 [ 0, %entry ], [ %inc, %loop ] |
| %gep.1 = getelementptr inbounds <16 x i8>, ptr %p1, i64 %lx |
| %gep.2 = getelementptr inbounds <16 x i8>, ptr %p2, i64 %lx |
| %l1 = load <16 x i8>, ptr %gep.1 |
| %z2 = zext <16 x i8> %l1 to <16 x i32> |
| %l4 = load <16 x i8>, ptr %gep.2 |
| %z5 = zext <16 x i8> %l4 to <16 x i32> |
| %sub = sub nsw <16 x i32> %z2, %z5 |
| %abs = tail call <16 x i32> @llvm.abs.v16i32(<16 x i32> %sub, i1 true) |
| %red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %abs) |
| %op.rdx = add i32 %red, %s0 |
| %inc = add nuw nsw i32 %j.0261, 1 |
| %exitcond.not = icmp eq i32 %inc, %h |
| br i1 %exitcond.not, label %exit, label %loop |
| |
| exit: |
| %s1 = phi i32 [ %op.rdx, %loop ] |
| ret i32 %s1 |
| } |
| |
| declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1 immarg) |
| |
| declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) |
| |
| define i32 @test_widening_instr_mull(ptr %p1, ptr %p2, i32 %h) { |
| ; CHECK-LABEL: test_widening_instr_mull: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: mov x8, x0 |
| ; CHECK-NEXT: LBB24_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q0, [x1], #16 |
| ; CHECK-NEXT: ldr q1, [x0] |
| ; CHECK-NEXT: subs w2, w2, #1 |
| ; CHECK-NEXT: ldr q2, [x8, #16]! |
| ; CHECK-NEXT: ushll2.8h v3, v0, #0 |
| ; CHECK-NEXT: ushll.8h v0, v0, #0 |
| ; CHECK-NEXT: umull2.4s v4, v2, v3 |
| ; CHECK-NEXT: umull.4s v2, v2, v3 |
| ; CHECK-NEXT: umull.4s v3, v1, v0 |
| ; CHECK-NEXT: umull2.4s v0, v1, v0 |
| ; CHECK-NEXT: stp q2, q4, [x0, #32] |
| ; CHECK-NEXT: str q3, [x0] |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: str q0, [x8] |
| ; CHECK-NEXT: b.ne LBB24_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: test_widening_instr_mull: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: .LBB24_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x1] |
| ; CHECK-BE-NEXT: add x8, x0, #16 |
| ; CHECK-BE-NEXT: add x9, x0, #48 |
| ; CHECK-BE-NEXT: add x10, x0, #32 |
| ; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] |
| ; CHECK-BE-NEXT: subs w2, w2, #1 |
| ; CHECK-BE-NEXT: add x1, x1, #16 |
| ; CHECK-BE-NEXT: ld1 { v4.8h }, [x8] |
| ; CHECK-BE-NEXT: ushll v2.8h, v0.8b, #0 |
| ; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0 |
| ; CHECK-BE-NEXT: umull v3.4s, v1.4h, v2.4h |
| ; CHECK-BE-NEXT: umull2 v1.4s, v1.8h, v2.8h |
| ; CHECK-BE-NEXT: umull2 v2.4s, v4.8h, v0.8h |
| ; CHECK-BE-NEXT: umull v0.4s, v4.4h, v0.4h |
| ; CHECK-BE-NEXT: st1 { v3.4s }, [x0] |
| ; CHECK-BE-NEXT: mov x0, x8 |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x9] |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x10] |
| ; CHECK-BE-NEXT: b.ne .LBB24_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: mov w0, wzr |
| ; CHECK-BE-NEXT: ret |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] |
| %gep.1 = getelementptr inbounds <16 x i8>, ptr %p1, i32 %iv |
| %gep.2 = getelementptr inbounds <16 x i8>, ptr %p2, i32 %iv |
| %l1 = load <16 x i16>, ptr %gep.1 |
| %z2 = zext <16 x i16> %l1 to <16 x i32> |
| %l4 = load <16 x i8>, ptr %gep.2 |
| %z5 = zext <16 x i8> %l4 to <16 x i32> |
| %mul = mul <16 x i32> %z2, %z5 |
| store <16 x i32> %mul, ptr %gep.1 |
| %iv.next= add nuw nsw i32 %iv, 1 |
| %exitcond.not = icmp eq i32 %iv.next, %h |
| br i1 %exitcond.not, label %exit, label %loop |
| |
| exit: |
| ret i32 0 |
| } |
| |
| define i32 @test_widening_instr_mull_64(ptr %p1, ptr %p2, i32 %h) { |
| ; CHECK-LABEL: test_widening_instr_mull_64: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: Lloh42: |
| ; CHECK-NEXT: adrp x8, lCPI25_0@PAGE |
| ; CHECK-NEXT: Lloh43: |
| ; CHECK-NEXT: adrp x9, lCPI25_1@PAGE |
| ; CHECK-NEXT: Lloh44: |
| ; CHECK-NEXT: adrp x10, lCPI25_2@PAGE |
| ; CHECK-NEXT: Lloh45: |
| ; CHECK-NEXT: adrp x11, lCPI25_3@PAGE |
| ; CHECK-NEXT: Lloh46: |
| ; CHECK-NEXT: ldr q0, [x8, lCPI25_0@PAGEOFF] |
| ; CHECK-NEXT: mov x8, x1 |
| ; CHECK-NEXT: Lloh47: |
| ; CHECK-NEXT: ldr q1, [x9, lCPI25_1@PAGEOFF] |
| ; CHECK-NEXT: Lloh48: |
| ; CHECK-NEXT: ldr q2, [x10, lCPI25_2@PAGEOFF] |
| ; CHECK-NEXT: Lloh49: |
| ; CHECK-NEXT: ldr q3, [x11, lCPI25_3@PAGEOFF] |
| ; CHECK-NEXT: LBB25_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q4, [x0] |
| ; CHECK-NEXT: subs w2, w2, #1 |
| ; CHECK-NEXT: ldp q7, q17, [x1, #32] |
| ; CHECK-NEXT: tbl.16b v16, { v4 }, v3 |
| ; CHECK-NEXT: tbl.16b v18, { v4 }, v0 |
| ; CHECK-NEXT: tbl.16b v19, { v4 }, v1 |
| ; CHECK-NEXT: tbl.16b v4, { v4 }, v2 |
| ; CHECK-NEXT: ldr q5, [x1] |
| ; CHECK-NEXT: ldr q6, [x8, #16]! |
| ; CHECK-NEXT: umull2.2d v20, v16, v17 |
| ; CHECK-NEXT: mov x1, x8 |
| ; CHECK-NEXT: umull2.2d v21, v18, v7 |
| ; CHECK-NEXT: umull.2d v16, v16, v17 |
| ; CHECK-NEXT: umull2.2d v17, v4, v6 |
| ; CHECK-NEXT: umull.2d v4, v4, v6 |
| ; CHECK-NEXT: umull2.2d v6, v19, v5 |
| ; CHECK-NEXT: str q21, [x0, #80] |
| ; CHECK-NEXT: umull.2d v5, v19, v5 |
| ; CHECK-NEXT: stp q16, q20, [x0, #96] |
| ; CHECK-NEXT: umull.2d v7, v18, v7 |
| ; CHECK-NEXT: stp q4, q17, [x0, #32] |
| ; CHECK-NEXT: stp q5, q6, [x0] |
| ; CHECK-NEXT: str q7, [x0, #64]! |
| ; CHECK-NEXT: b.ne LBB25_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .loh AdrpLdr Lloh45, Lloh49 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh44, Lloh48 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh43, Lloh47 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh42, Lloh46 |
| ; |
| ; CHECK-BE-LABEL: test_widening_instr_mull_64: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: adrp x8, .LCPI25_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI25_0 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI25_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI25_1 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI25_2 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI25_2 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI25_3 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI25_3 |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8] |
| ; CHECK-BE-NEXT: .LBB25_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: ld1 { v4.16b }, [x0] |
| ; CHECK-BE-NEXT: add x8, x1, #48 |
| ; CHECK-BE-NEXT: add x9, x1, #32 |
| ; CHECK-BE-NEXT: subs w2, w2, #1 |
| ; CHECK-BE-NEXT: ld1 { v5.4s }, [x1] |
| ; CHECK-BE-NEXT: add x1, x1, #16 |
| ; CHECK-BE-NEXT: ld1 { v6.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #96 |
| ; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v3.16b |
| ; CHECK-BE-NEXT: tbl v18.16b, { v4.16b }, v1.16b |
| ; CHECK-BE-NEXT: ld1 { v17.4s }, [x1] |
| ; CHECK-BE-NEXT: tbl v16.16b, { v4.16b }, v0.16b |
| ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v2.16b |
| ; CHECK-BE-NEXT: ld1 { v19.4s }, [x9] |
| ; CHECK-BE-NEXT: rev32 v20.8b, v7.8b |
| ; CHECK-BE-NEXT: add x9, x0, #32 |
| ; CHECK-BE-NEXT: ext v23.16b, v6.16b, v6.16b, #8 |
| ; CHECK-BE-NEXT: rev32 v22.8b, v18.8b |
| ; CHECK-BE-NEXT: ext v7.16b, v7.16b, v7.16b, #8 |
| ; CHECK-BE-NEXT: ext v24.16b, v4.16b, v4.16b, #8 |
| ; CHECK-BE-NEXT: umull v6.2d, v20.2s, v6.2s |
| ; CHECK-BE-NEXT: umull v20.2d, v22.2s, v17.2s |
| ; CHECK-BE-NEXT: ext v22.16b, v19.16b, v19.16b, #8 |
| ; CHECK-BE-NEXT: ext v21.16b, v5.16b, v5.16b, #8 |
| ; CHECK-BE-NEXT: st1 { v6.2d }, [x8] |
| ; CHECK-BE-NEXT: rev32 v6.8b, v7.8b |
| ; CHECK-BE-NEXT: ext v7.16b, v18.16b, v18.16b, #8 |
| ; CHECK-BE-NEXT: rev32 v18.8b, v16.8b |
| ; CHECK-BE-NEXT: ext v16.16b, v16.16b, v16.16b, #8 |
| ; CHECK-BE-NEXT: add x8, x0, #112 |
| ; CHECK-BE-NEXT: st1 { v20.2d }, [x9] |
| ; CHECK-BE-NEXT: rev32 v20.8b, v24.8b |
| ; CHECK-BE-NEXT: umull v6.2d, v6.2s, v23.2s |
| ; CHECK-BE-NEXT: rev32 v4.8b, v4.8b |
| ; CHECK-BE-NEXT: umull v5.2d, v18.2s, v5.2s |
| ; CHECK-BE-NEXT: add x9, x0, #80 |
| ; CHECK-BE-NEXT: ext v17.16b, v17.16b, v17.16b, #8 |
| ; CHECK-BE-NEXT: umull v18.2d, v20.2s, v22.2s |
| ; CHECK-BE-NEXT: st1 { v6.2d }, [x8] |
| ; CHECK-BE-NEXT: rev32 v6.8b, v7.8b |
| ; CHECK-BE-NEXT: rev32 v7.8b, v16.8b |
| ; CHECK-BE-NEXT: st1 { v5.2d }, [x0] |
| ; CHECK-BE-NEXT: umull v4.2d, v4.2s, v19.2s |
| ; CHECK-BE-NEXT: add x8, x0, #48 |
| ; CHECK-BE-NEXT: st1 { v18.2d }, [x9] |
| ; CHECK-BE-NEXT: add x9, x0, #16 |
| ; CHECK-BE-NEXT: umull v5.2d, v6.2s, v17.2s |
| ; CHECK-BE-NEXT: add x0, x0, #64 |
| ; CHECK-BE-NEXT: umull v6.2d, v7.2s, v21.2s |
| ; CHECK-BE-NEXT: st1 { v4.2d }, [x0] |
| ; CHECK-BE-NEXT: st1 { v5.2d }, [x8] |
| ; CHECK-BE-NEXT: st1 { v6.2d }, [x9] |
| ; CHECK-BE-NEXT: b.ne .LBB25_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: mov w0, wzr |
| ; CHECK-BE-NEXT: ret |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] |
| %gep.1 = getelementptr inbounds <16 x i32>, ptr %p1, i32 %iv |
| %gep.2 = getelementptr inbounds <16 x i8>, ptr %p2, i32 %iv |
| %l1 = load <16 x i8>, ptr %gep.1 |
| %z2 = zext <16 x i8> %l1 to <16 x i64> |
| %l4 = load <16 x i32>, ptr %gep.2 |
| %z5 = zext <16 x i32> %l4 to <16 x i64> |
| %mul = mul <16 x i64> %z2, %z5 |
| store <16 x i64> %mul, ptr %gep.1 |
| %iv.next= add nuw nsw i32 %iv, 1 |
| %exitcond.not = icmp eq i32 %iv.next, %h |
| br i1 %exitcond.not, label %exit, label %loop |
| |
| exit: |
| ret i32 0 |
| } |
| |
| define i32 @test_widening_instr_mull_2(ptr %p1, ptr %p2, i32 %h) { |
| ; CHECK-LABEL: test_widening_instr_mull_2: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: Lloh50: |
| ; CHECK-NEXT: adrp x8, lCPI26_0@PAGE |
| ; CHECK-NEXT: Lloh51: |
| ; CHECK-NEXT: adrp x9, lCPI26_1@PAGE |
| ; CHECK-NEXT: Lloh52: |
| ; CHECK-NEXT: adrp x10, lCPI26_2@PAGE |
| ; CHECK-NEXT: Lloh53: |
| ; CHECK-NEXT: adrp x11, lCPI26_3@PAGE |
| ; CHECK-NEXT: Lloh54: |
| ; CHECK-NEXT: ldr q0, [x8, lCPI26_0@PAGEOFF] |
| ; CHECK-NEXT: mov x8, x0 |
| ; CHECK-NEXT: Lloh55: |
| ; CHECK-NEXT: ldr q1, [x9, lCPI26_1@PAGEOFF] |
| ; CHECK-NEXT: Lloh56: |
| ; CHECK-NEXT: ldr q2, [x10, lCPI26_2@PAGEOFF] |
| ; CHECK-NEXT: Lloh57: |
| ; CHECK-NEXT: ldr q3, [x11, lCPI26_3@PAGEOFF] |
| ; CHECK-NEXT: LBB26_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldr q4, [x1], #16 |
| ; CHECK-NEXT: ldp q5, q6, [x0, #32] |
| ; CHECK-NEXT: subs w2, w2, #1 |
| ; CHECK-NEXT: tbl.16b v16, { v4 }, v0 |
| ; CHECK-NEXT: tbl.16b v18, { v4 }, v1 |
| ; CHECK-NEXT: tbl.16b v19, { v4 }, v2 |
| ; CHECK-NEXT: tbl.16b v4, { v4 }, v3 |
| ; CHECK-NEXT: ldr q7, [x0] |
| ; CHECK-NEXT: ldr q17, [x8, #16]! |
| ; CHECK-NEXT: mul.4s v5, v5, v16 |
| ; CHECK-NEXT: mul.4s v6, v6, v18 |
| ; CHECK-NEXT: mul.4s v7, v7, v19 |
| ; CHECK-NEXT: mul.4s v4, v17, v4 |
| ; CHECK-NEXT: stp q5, q6, [x0, #32] |
| ; CHECK-NEXT: str q7, [x0] |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: str q4, [x8] |
| ; CHECK-NEXT: b.ne LBB26_1 |
| ; CHECK-NEXT: ; %bb.2: ; %exit |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .loh AdrpLdr Lloh53, Lloh57 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh52, Lloh56 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh51, Lloh55 |
| ; CHECK-NEXT: .loh AdrpLdr Lloh50, Lloh54 |
| ; |
| ; CHECK-BE-LABEL: test_widening_instr_mull_2: |
| ; CHECK-BE: // %bb.0: // %entry |
| ; CHECK-BE-NEXT: adrp x8, .LCPI26_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_0 |
| ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI26_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_1 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI26_2 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_2 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI26_3 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI26_3 |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8] |
| ; CHECK-BE-NEXT: .LBB26_1: // %loop |
| ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-BE-NEXT: ld1 { v4.16b }, [x1] |
| ; CHECK-BE-NEXT: add x8, x0, #32 |
| ; CHECK-BE-NEXT: add x9, x0, #48 |
| ; CHECK-BE-NEXT: add x10, x0, #16 |
| ; CHECK-BE-NEXT: ld1 { v6.4s }, [x0] |
| ; CHECK-BE-NEXT: subs w2, w2, #1 |
| ; CHECK-BE-NEXT: add x1, x1, #16 |
| ; CHECK-BE-NEXT: ld1 { v16.4s }, [x8] |
| ; CHECK-BE-NEXT: tbl v5.16b, { v4.16b }, v1.16b |
| ; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v0.16b |
| ; CHECK-BE-NEXT: ld1 { v18.4s }, [x10] |
| ; CHECK-BE-NEXT: tbl v17.16b, { v4.16b }, v3.16b |
| ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v2.16b |
| ; CHECK-BE-NEXT: rev32 v5.16b, v5.16b |
| ; CHECK-BE-NEXT: rev32 v7.16b, v7.16b |
| ; CHECK-BE-NEXT: rev32 v17.16b, v17.16b |
| ; CHECK-BE-NEXT: rev32 v4.16b, v4.16b |
| ; CHECK-BE-NEXT: mul v5.4s, v6.4s, v5.4s |
| ; CHECK-BE-NEXT: ld1 { v6.4s }, [x9] |
| ; CHECK-BE-NEXT: mul v7.4s, v18.4s, v7.4s |
| ; CHECK-BE-NEXT: st1 { v5.4s }, [x0] |
| ; CHECK-BE-NEXT: mov x0, x10 |
| ; CHECK-BE-NEXT: mul v5.4s, v16.4s, v17.4s |
| ; CHECK-BE-NEXT: st1 { v7.4s }, [x10] |
| ; CHECK-BE-NEXT: mul v4.4s, v6.4s, v4.4s |
| ; CHECK-BE-NEXT: st1 { v5.4s }, [x8] |
| ; CHECK-BE-NEXT: st1 { v4.4s }, [x9] |
| ; CHECK-BE-NEXT: b.ne .LBB26_1 |
| ; CHECK-BE-NEXT: // %bb.2: // %exit |
| ; CHECK-BE-NEXT: mov w0, wzr |
| ; CHECK-BE-NEXT: ret |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] |
| %gep.1 = getelementptr inbounds <16 x i8>, ptr %p1, i32 %iv |
| %gep.2 = getelementptr inbounds <16 x i8>, ptr %p2, i32 %iv |
| %l1 = load <16 x i32>, ptr %gep.1 |
| %l4 = load <16 x i8>, ptr %gep.2 |
| %z5 = zext <16 x i8> %l4 to <16 x i32> |
| %mul = mul <16 x i32> %l1, %z5 |
| store <16 x i32> %mul, ptr %gep.1 |
| %iv.next= add nuw nsw i32 %iv, 1 |
| %exitcond.not = icmp eq i32 %iv.next, %h |
| br i1 %exitcond.not, label %exit, label %loop |
| |
| exit: |
| ret i32 0 |
| } |