| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s |
| |
| define <4 x i32> @test_compress_v4i32(<4 x i32> %vec, <4 x i1> %mask) { |
| ; CHECK-LABEL: test_compress_v4i32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ushll.4s v1, v1, #0 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str s0, [sp] |
| ; CHECK-NEXT: shl.4s v1, v1, #31 |
| ; CHECK-NEXT: cmlt.4s v1, v1, #0 |
| ; CHECK-NEXT: mov.s w9, v1[1] |
| ; CHECK-NEXT: mov.s w10, v1[2] |
| ; CHECK-NEXT: fmov w11, s1 |
| ; CHECK-NEXT: bfi x8, x11, #2, #1 |
| ; CHECK-NEXT: and x11, x11, #0x1 |
| ; CHECK-NEXT: and x9, x9, #0x1 |
| ; CHECK-NEXT: and w10, w10, #0x1 |
| ; CHECK-NEXT: add x9, x11, x9 |
| ; CHECK-NEXT: mov x11, sp |
| ; CHECK-NEXT: st1.s { v0 }[1], [x8] |
| ; CHECK-NEXT: add w10, w9, w10 |
| ; CHECK-NEXT: orr x9, x11, x9, lsl #2 |
| ; CHECK-NEXT: bfi x11, x10, #2, #2 |
| ; CHECK-NEXT: st1.s { v0 }[2], [x9] |
| ; CHECK-NEXT: st1.s { v0 }[3], [x11] |
| ; CHECK-NEXT: ldr q0, [sp], #16 |
| ; CHECK-NEXT: ret |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> undef) |
| ret <4 x i32> %out |
| } |
| |
| |
| define <4 x i32> @test_compress_v4i32_with_passthru(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> %passthru) { |
| ; CHECK-LABEL: test_compress_v4i32_with_passthru: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: str q2, [sp, #-16]! |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ushll.4s v1, v1, #0 |
| ; CHECK-NEXT: movi.4s v3, #1 |
| ; CHECK-NEXT: mov x12, sp |
| ; CHECK-NEXT: mov x10, sp |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: mov x14, sp |
| ; CHECK-NEXT: mov w15, #3 ; =0x3 |
| ; CHECK-NEXT: shl.4s v1, v1, #31 |
| ; CHECK-NEXT: cmlt.4s v1, v1, #0 |
| ; CHECK-NEXT: and.16b v3, v1, v3 |
| ; CHECK-NEXT: mov.s w8, v1[1] |
| ; CHECK-NEXT: fmov w16, s1 |
| ; CHECK-NEXT: mov.s w11, v1[2] |
| ; CHECK-NEXT: mov.s w13, v1[3] |
| ; CHECK-NEXT: addv.4s s2, v3 |
| ; CHECK-NEXT: bfi x12, x16, #2, #1 |
| ; CHECK-NEXT: and x16, x16, #0x1 |
| ; CHECK-NEXT: and x8, x8, #0x1 |
| ; CHECK-NEXT: add x8, x16, x8 |
| ; CHECK-NEXT: and x11, x11, #0x1 |
| ; CHECK-NEXT: and x13, x13, #0x1 |
| ; CHECK-NEXT: fmov w16, s2 |
| ; CHECK-NEXT: add x11, x8, x11 |
| ; CHECK-NEXT: orr x8, x9, x8, lsl #2 |
| ; CHECK-NEXT: add x13, x11, x13 |
| ; CHECK-NEXT: bfi x14, x11, #2, #2 |
| ; CHECK-NEXT: cmp x13, #3 |
| ; CHECK-NEXT: bfi x10, x16, #2, #2 |
| ; CHECK-NEXT: mov.s w16, v0[3] |
| ; CHECK-NEXT: csel x11, x13, x15, lo |
| ; CHECK-NEXT: ldr w10, [x10] |
| ; CHECK-NEXT: str s0, [sp] |
| ; CHECK-NEXT: st1.s { v0 }[1], [x12] |
| ; CHECK-NEXT: st1.s { v0 }[2], [x8] |
| ; CHECK-NEXT: orr x8, x9, x11, lsl #2 |
| ; CHECK-NEXT: csel w9, w16, w10, hi |
| ; CHECK-NEXT: st1.s { v0 }[3], [x14] |
| ; CHECK-NEXT: str w9, [x8] |
| ; CHECK-NEXT: ldr q0, [sp], #16 |
| ; CHECK-NEXT: ret |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> %passthru) |
| ret <4 x i32> %out |
| } |
| |
| define <2 x double> @test_compress_v2f64(<2 x double> %vec, <2 x i1> %mask) { |
| ; CHECK-LABEL: test_compress_v2f64: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: ushll.2d v1, v1, #0 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str d0, [sp] |
| ; CHECK-NEXT: shl.2d v1, v1, #63 |
| ; CHECK-NEXT: cmlt.2d v1, v1, #0 |
| ; CHECK-NEXT: fmov x9, d1 |
| ; CHECK-NEXT: bfi x8, x9, #3, #1 |
| ; CHECK-NEXT: st1.d { v0 }[1], [x8] |
| ; CHECK-NEXT: ldr q0, [sp], #16 |
| ; CHECK-NEXT: ret |
| %out = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %vec, <2 x i1> %mask, <2 x double> undef) |
| ret <2 x double> %out |
| } |
| |
| define <16 x i8> @test_compress_v16i8(<16 x i8> %vec, <16 x i1> %mask) { |
| ; CHECK-LABEL: test_compress_v16i8: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: shl.16b v1, v1, #7 |
| ; CHECK-NEXT: mov x12, sp |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: str b0, [sp] |
| ; CHECK-NEXT: mov x13, sp |
| ; CHECK-NEXT: cmlt.16b v1, v1, #0 |
| ; CHECK-NEXT: umov.b w9, v1[0] |
| ; CHECK-NEXT: umov.b w10, v1[1] |
| ; CHECK-NEXT: umov.b w11, v1[2] |
| ; CHECK-NEXT: umov.b w14, v1[3] |
| ; CHECK-NEXT: bfxil x12, x9, #0, #1 |
| ; CHECK-NEXT: and x10, x10, #0x1 |
| ; CHECK-NEXT: and x9, x9, #0x1 |
| ; CHECK-NEXT: add x9, x9, x10 |
| ; CHECK-NEXT: umov.b w10, v1[4] |
| ; CHECK-NEXT: and x11, x11, #0x1 |
| ; CHECK-NEXT: st1.b { v0 }[1], [x12] |
| ; CHECK-NEXT: orr x12, x8, x9 |
| ; CHECK-NEXT: add x9, x9, x11 |
| ; CHECK-NEXT: umov.b w11, v1[5] |
| ; CHECK-NEXT: and x14, x14, #0x1 |
| ; CHECK-NEXT: st1.b { v0 }[2], [x12] |
| ; CHECK-NEXT: add x14, x9, x14 |
| ; CHECK-NEXT: umov.b w12, v1[6] |
| ; CHECK-NEXT: orr x9, x8, x9 |
| ; CHECK-NEXT: and x10, x10, #0x1 |
| ; CHECK-NEXT: st1.b { v0 }[3], [x9] |
| ; CHECK-NEXT: orr x9, x8, x14 |
| ; CHECK-NEXT: add x10, x14, x10 |
| ; CHECK-NEXT: umov.b w14, v1[7] |
| ; CHECK-NEXT: st1.b { v0 }[4], [x9] |
| ; CHECK-NEXT: and x11, x11, #0x1 |
| ; CHECK-NEXT: bfxil x13, x10, #0, #4 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: add x10, x10, x11 |
| ; CHECK-NEXT: umov.b w11, v1[8] |
| ; CHECK-NEXT: and x12, x12, #0x1 |
| ; CHECK-NEXT: bfxil x9, x10, #0, #4 |
| ; CHECK-NEXT: st1.b { v0 }[5], [x13] |
| ; CHECK-NEXT: umov.b w13, v1[9] |
| ; CHECK-NEXT: add x10, x10, x12 |
| ; CHECK-NEXT: mov x12, sp |
| ; CHECK-NEXT: and x14, x14, #0x1 |
| ; CHECK-NEXT: st1.b { v0 }[6], [x9] |
| ; CHECK-NEXT: umov.b w9, v1[10] |
| ; CHECK-NEXT: bfxil x12, x10, #0, #4 |
| ; CHECK-NEXT: add x10, x10, x14 |
| ; CHECK-NEXT: mov x14, sp |
| ; CHECK-NEXT: and x11, x11, #0x1 |
| ; CHECK-NEXT: bfxil x14, x10, #0, #4 |
| ; CHECK-NEXT: add x10, x10, x11 |
| ; CHECK-NEXT: mov x11, sp |
| ; CHECK-NEXT: and x13, x13, #0x1 |
| ; CHECK-NEXT: st1.b { v0 }[7], [x12] |
| ; CHECK-NEXT: mov x12, sp |
| ; CHECK-NEXT: bfxil x11, x10, #0, #4 |
| ; CHECK-NEXT: add x10, x10, x13 |
| ; CHECK-NEXT: umov.b w13, v1[11] |
| ; CHECK-NEXT: st1.b { v0 }[8], [x14] |
| ; CHECK-NEXT: umov.b w14, v1[12] |
| ; CHECK-NEXT: and x9, x9, #0x1 |
| ; CHECK-NEXT: bfxil x12, x10, #0, #4 |
| ; CHECK-NEXT: add x9, x10, x9 |
| ; CHECK-NEXT: mov x10, sp |
| ; CHECK-NEXT: st1.b { v0 }[9], [x11] |
| ; CHECK-NEXT: umov.b w11, v1[13] |
| ; CHECK-NEXT: bfxil x10, x9, #0, #4 |
| ; CHECK-NEXT: st1.b { v0 }[10], [x12] |
| ; CHECK-NEXT: umov.b w12, v1[14] |
| ; CHECK-NEXT: and x13, x13, #0x1 |
| ; CHECK-NEXT: and x14, x14, #0x1 |
| ; CHECK-NEXT: add x9, x9, x13 |
| ; CHECK-NEXT: st1.b { v0 }[11], [x10] |
| ; CHECK-NEXT: mov x10, sp |
| ; CHECK-NEXT: add x13, x9, x14 |
| ; CHECK-NEXT: mov x14, sp |
| ; CHECK-NEXT: bfxil x10, x9, #0, #4 |
| ; CHECK-NEXT: and x9, x11, #0x1 |
| ; CHECK-NEXT: mov x11, sp |
| ; CHECK-NEXT: add x9, x13, x9 |
| ; CHECK-NEXT: and w12, w12, #0x1 |
| ; CHECK-NEXT: bfxil x14, x13, #0, #4 |
| ; CHECK-NEXT: bfxil x11, x9, #0, #4 |
| ; CHECK-NEXT: add w9, w9, w12 |
| ; CHECK-NEXT: st1.b { v0 }[12], [x10] |
| ; CHECK-NEXT: bfxil x8, x9, #0, #4 |
| ; CHECK-NEXT: st1.b { v0 }[13], [x14] |
| ; CHECK-NEXT: st1.b { v0 }[14], [x11] |
| ; CHECK-NEXT: st1.b { v0 }[15], [x8] |
| ; CHECK-NEXT: ldr q0, [sp], #16 |
| ; CHECK-NEXT: ret |
| %out = call <16 x i8> @llvm.experimental.vector.compress(<16 x i8> %vec, <16 x i1> %mask, <16 x i8> undef) |
| ret <16 x i8> %out |
| } |
| |
| define <8 x i32> @test_compress_large(<8 x i32> %vec, <8 x i1> %mask) { |
| ; CHECK-LABEL: test_compress_large: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: ; kill: def $d2 killed $d2 def $q2 |
| ; CHECK-NEXT: umov.b w9, v2[0] |
| ; CHECK-NEXT: umov.b w10, v2[1] |
| ; CHECK-NEXT: mov x11, sp |
| ; CHECK-NEXT: umov.b w12, v2[2] |
| ; CHECK-NEXT: umov.b w13, v2[3] |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: umov.b w15, v2[4] |
| ; CHECK-NEXT: str s0, [sp] |
| ; CHECK-NEXT: mov s3, v0[3] |
| ; CHECK-NEXT: and x10, x10, #0x1 |
| ; CHECK-NEXT: and x14, x9, #0x1 |
| ; CHECK-NEXT: bfi x11, x9, #2, #1 |
| ; CHECK-NEXT: add x9, x14, x10 |
| ; CHECK-NEXT: umov.b w10, v2[5] |
| ; CHECK-NEXT: st1.s { v0 }[1], [x11] |
| ; CHECK-NEXT: and x11, x12, #0x1 |
| ; CHECK-NEXT: orr x14, x8, x9, lsl #2 |
| ; CHECK-NEXT: and x12, x13, #0x1 |
| ; CHECK-NEXT: add x9, x9, x11 |
| ; CHECK-NEXT: umov.b w11, v2[6] |
| ; CHECK-NEXT: and x13, x15, #0x1 |
| ; CHECK-NEXT: add x12, x9, x12 |
| ; CHECK-NEXT: st1.s { v0 }[2], [x14] |
| ; CHECK-NEXT: str s3, [x8, x9, lsl #2] |
| ; CHECK-NEXT: and x9, x10, #0x1 |
| ; CHECK-NEXT: add x10, x12, x13 |
| ; CHECK-NEXT: and x12, x12, #0x7 |
| ; CHECK-NEXT: add x9, x10, x9 |
| ; CHECK-NEXT: and x10, x10, #0x7 |
| ; CHECK-NEXT: str s1, [x8, x12, lsl #2] |
| ; CHECK-NEXT: and x12, x9, #0x7 |
| ; CHECK-NEXT: mov s0, v1[3] |
| ; CHECK-NEXT: and w11, w11, #0x1 |
| ; CHECK-NEXT: add x10, x8, x10, lsl #2 |
| ; CHECK-NEXT: add x12, x8, x12, lsl #2 |
| ; CHECK-NEXT: add w9, w9, w11 |
| ; CHECK-NEXT: and x9, x9, #0x7 |
| ; CHECK-NEXT: st1.s { v1 }[1], [x10] |
| ; CHECK-NEXT: st1.s { v1 }[2], [x12] |
| ; CHECK-NEXT: str s0, [x8, x9, lsl #2] |
| ; CHECK-NEXT: ldp q0, q1, [sp], #32 |
| ; CHECK-NEXT: ret |
| %out = call <8 x i32> @llvm.experimental.vector.compress(<8 x i32> %vec, <8 x i1> %mask, <8 x i32> undef) |
| ret <8 x i32> %out |
| } |
| |
| define <4 x i32> @test_compress_all_const() { |
| ; CHECK-LABEL: test_compress_all_const: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: Lloh0: |
| ; CHECK-NEXT: adrp x8, lCPI5_0@PAGE |
| ; CHECK-NEXT: Lloh1: |
| ; CHECK-NEXT: ldr q0, [x8, lCPI5_0@PAGEOFF] |
| ; CHECK-NEXT: ret |
| ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> <i32 3, i32 5, i32 7, i32 9>, |
| <4 x i1> <i1 0, i1 1, i1 0, i1 1>, |
| <4 x i32> undef) |
| ret <4 x i32> %out |
| } |
| |
| define <4 x i32> @test_compress_const_mask(<4 x i32> %vec) { |
| ; CHECK-LABEL: test_compress_const_mask: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: mov.s v0[1], v0[3] |
| ; CHECK-NEXT: ret |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> <i1 1, i1 undef, i1 0, i1 1>, <4 x i32> undef) |
| ret <4 x i32> %out |
| } |
| |
| define <4 x i32> @test_compress_const_mask_passthrough(<4 x i32> %vec, <4 x i32> %passthru) { |
| ; CHECK-LABEL: test_compress_const_mask_passthrough: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: mov.d v1[0], v0[1] |
| ; CHECK-NEXT: mov.s v1[0], v0[0] |
| ; CHECK-NEXT: mov.16b v0, v1 |
| ; CHECK-NEXT: ret |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> <i1 1, i1 undef, i1 0, i1 1>, <4 x i32> %passthru) |
| ret <4 x i32> %out |
| } |
| |
| define <4 x i32> @test_compress_const_mask_const_passthrough(<4 x i32> %vec) { |
| ; CHECK-LABEL: test_compress_const_mask_const_passthrough: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: mov.s v0[1], v0[3] |
| ; CHECK-NEXT: mov w8, #7 ; =0x7 |
| ; CHECK-NEXT: mov.s v0[2], w8 |
| ; CHECK-NEXT: mov w8, #8 ; =0x8 |
| ; CHECK-NEXT: mov.s v0[3], w8 |
| ; CHECK-NEXT: ret |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>) |
| ret <4 x i32> %out |
| } |
| |
| ; We pass a placeholder value for the const_mask* tests to check that they are converted to a no-op by simply copying |
| ; the second vector input register to the return register or doing nothing. |
| define <4 x i32> @test_compress_const_splat1_mask(<4 x i32> %ignore, <4 x i32> %vec) { |
| ; CHECK-LABEL: test_compress_const_splat1_mask: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: mov.16b v0, v1 |
| ; CHECK-NEXT: ret |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> splat (i1 -1), <4 x i32> undef) |
| ret <4 x i32> %out |
| } |
| define <4 x i32> @test_compress_const_splat0_mask(<4 x i32> %ignore, <4 x i32> %vec) { |
| ; CHECK-LABEL: test_compress_const_splat0_mask: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ret |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> splat (i1 0), <4 x i32> undef) |
| ret <4 x i32> %out |
| } |
| define <4 x i32> @test_compress_undef_mask(<4 x i32> %ignore, <4 x i32> %vec) { |
| ; CHECK-LABEL: test_compress_undef_mask: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ret |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> undef, <4 x i32> undef) |
| ret <4 x i32> %out |
| } |
| define <4 x i32> @test_compress_const_splat0_mask_with_passthru(<4 x i32> %ignore, <4 x i32> %vec, <4 x i32> %passthru) { |
| ; CHECK-LABEL: test_compress_const_splat0_mask_with_passthru: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: mov.16b v0, v2 |
| ; CHECK-NEXT: ret |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> splat (i1 0), <4 x i32> %passthru) |
| ret <4 x i32> %out |
| } |
| define <4 x i32> @test_compress_const_splat0_mask_without_passthru(<4 x i32> %ignore, <4 x i32> %vec) { |
| ; CHECK-LABEL: test_compress_const_splat0_mask_without_passthru: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: ret |
| %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> splat (i1 0), <4 x i32> undef) |
| ret <4 x i32> %out |
| } |
| |
| define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) { |
| ; CHECK-LABEL: test_compress_small: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: shl.4h v1, v1, #15 |
| ; CHECK-NEXT: add x8, sp, #8 |
| ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: str h0, [sp, #8] |
| ; CHECK-NEXT: cmlt.4h v1, v1, #0 |
| ; CHECK-NEXT: umov.h w9, v1[0] |
| ; CHECK-NEXT: umov.h w10, v1[1] |
| ; CHECK-NEXT: umov.h w11, v1[2] |
| ; CHECK-NEXT: bfi x8, x9, #1, #1 |
| ; CHECK-NEXT: and x10, x10, #0x1 |
| ; CHECK-NEXT: and x9, x9, #0x1 |
| ; CHECK-NEXT: add x9, x9, x10 |
| ; CHECK-NEXT: and w11, w11, #0x1 |
| ; CHECK-NEXT: add x10, sp, #8 |
| ; CHECK-NEXT: add w11, w9, w11 |
| ; CHECK-NEXT: orr x9, x10, x9, lsl #1 |
| ; CHECK-NEXT: st1.h { v0 }[1], [x8] |
| ; CHECK-NEXT: bfi x10, x11, #1, #2 |
| ; CHECK-NEXT: st1.h { v0 }[2], [x9] |
| ; CHECK-NEXT: st1.h { v0 }[3], [x10] |
| ; CHECK-NEXT: ldr d0, [sp, #8] |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| %out = call <4 x i8> @llvm.experimental.vector.compress(<4 x i8> %vec, <4 x i1> %mask, <4 x i8> undef) |
| ret <4 x i8> %out |
| } |
| |
| define <4 x i4> @test_compress_illegal_element_type(<4 x i4> %vec, <4 x i1> %mask) { |
| ; CHECK-LABEL: test_compress_illegal_element_type: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: shl.4h v1, v1, #15 |
| ; CHECK-NEXT: add x8, sp, #8 |
| ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: str h0, [sp, #8] |
| ; CHECK-NEXT: cmlt.4h v1, v1, #0 |
| ; CHECK-NEXT: umov.h w9, v1[0] |
| ; CHECK-NEXT: umov.h w10, v1[1] |
| ; CHECK-NEXT: umov.h w11, v1[2] |
| ; CHECK-NEXT: bfi x8, x9, #1, #1 |
| ; CHECK-NEXT: and x10, x10, #0x1 |
| ; CHECK-NEXT: and x9, x9, #0x1 |
| ; CHECK-NEXT: add x9, x9, x10 |
| ; CHECK-NEXT: and w11, w11, #0x1 |
| ; CHECK-NEXT: add x10, sp, #8 |
| ; CHECK-NEXT: add w11, w9, w11 |
| ; CHECK-NEXT: orr x9, x10, x9, lsl #1 |
| ; CHECK-NEXT: st1.h { v0 }[1], [x8] |
| ; CHECK-NEXT: bfi x10, x11, #1, #2 |
| ; CHECK-NEXT: st1.h { v0 }[2], [x9] |
| ; CHECK-NEXT: st1.h { v0 }[3], [x10] |
| ; CHECK-NEXT: ldr d0, [sp, #8] |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| %out = call <4 x i4> @llvm.experimental.vector.compress(<4 x i4> %vec, <4 x i1> %mask, <4 x i4> undef) |
| ret <4 x i4> %out |
| } |
| |
| define <3 x i32> @test_compress_narrow(<3 x i32> %vec, <3 x i1> %mask) { |
| ; CHECK-LABEL: test_compress_narrow: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: movi.2d v1, #0000000000000000 |
| ; CHECK-NEXT: mov x11, sp |
| ; CHECK-NEXT: str s0, [sp] |
| ; CHECK-NEXT: mov.h v1[0], w0 |
| ; CHECK-NEXT: mov.h v1[1], w1 |
| ; CHECK-NEXT: mov.h v1[2], w2 |
| ; CHECK-NEXT: ushll.4s v1, v1, #0 |
| ; CHECK-NEXT: shl.4s v1, v1, #31 |
| ; CHECK-NEXT: cmlt.4s v1, v1, #0 |
| ; CHECK-NEXT: mov.s w8, v1[1] |
| ; CHECK-NEXT: mov.s w9, v1[2] |
| ; CHECK-NEXT: fmov w10, s1 |
| ; CHECK-NEXT: bfi x11, x10, #2, #1 |
| ; CHECK-NEXT: and x10, x10, #0x1 |
| ; CHECK-NEXT: and x8, x8, #0x1 |
| ; CHECK-NEXT: and w9, w9, #0x1 |
| ; CHECK-NEXT: add x8, x10, x8 |
| ; CHECK-NEXT: mov x10, sp |
| ; CHECK-NEXT: st1.s { v0 }[1], [x11] |
| ; CHECK-NEXT: add w9, w8, w9 |
| ; CHECK-NEXT: orr x8, x10, x8, lsl #2 |
| ; CHECK-NEXT: bfi x10, x9, #2, #2 |
| ; CHECK-NEXT: st1.s { v0 }[2], [x8] |
| ; CHECK-NEXT: st1.s { v0 }[3], [x10] |
| ; CHECK-NEXT: ldr q0, [sp], #16 |
| ; CHECK-NEXT: ret |
| %out = call <3 x i32> @llvm.experimental.vector.compress(<3 x i32> %vec, <3 x i1> %mask, <3 x i32> undef) |
| ret <3 x i32> %out |
| } |
| |
| define <3 x i3> @test_compress_narrow_illegal_element_type(<3 x i3> %vec, <3 x i1> %mask) { |
| ; CHECK-LABEL: test_compress_narrow_illegal_element_type: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-NEXT: movi.2d v0, #0000000000000000 |
| ; CHECK-NEXT: add x10, sp, #8 |
| ; CHECK-NEXT: strh w0, [sp, #8] |
| ; CHECK-NEXT: mov.h v0[0], w3 |
| ; CHECK-NEXT: mov.h v0[1], w4 |
| ; CHECK-NEXT: mov.h v0[2], w5 |
| ; CHECK-NEXT: shl.4h v0, v0, #15 |
| ; CHECK-NEXT: cmlt.4h v0, v0, #0 |
| ; CHECK-NEXT: umov.h w8, v0[0] |
| ; CHECK-NEXT: umov.h w9, v0[1] |
| ; CHECK-NEXT: and x9, x9, #0x1 |
| ; CHECK-NEXT: and x11, x8, #0x1 |
| ; CHECK-NEXT: bfi x10, x8, #1, #1 |
| ; CHECK-NEXT: add x8, x11, x9 |
| ; CHECK-NEXT: add x9, sp, #8 |
| ; CHECK-NEXT: orr x8, x9, x8, lsl #1 |
| ; CHECK-NEXT: strh w1, [x10] |
| ; CHECK-NEXT: strh w2, [x8] |
| ; CHECK-NEXT: ldr d0, [sp, #8] |
| ; CHECK-NEXT: umov.h w0, v0[0] |
| ; CHECK-NEXT: umov.h w1, v0[1] |
| ; CHECK-NEXT: umov.h w2, v0[2] |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: ret |
| %out = call <3 x i3> @llvm.experimental.vector.compress(<3 x i3> %vec, <3 x i1> %mask, <3 x i3> undef) |
| ret <3 x i3> %out |
| } |