| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD |
| ; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -global-isel -global-isel-abort=2 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI |
| |
| ; Basic tests from input vector to bitmask |
| ; IR generated from clang for: |
| ; __builtin_convertvector + reinterpret_cast<uint16&> |
| |
| ; CHECK-GI: warning: Instruction selection used fallback path for convert_to_bitmask2 |
| ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for clang_builtins_undef_concat_convert_to_bitmask4 |
| ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_2xi32 |
| ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_8xi2 |
| ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for no_direct_convert_for_bad_concat |
| |
| define i16 @convert_to_bitmask16(<16 x i8> %vec) { |
| ; Bits used in mask |
| ; CHECK-SD-LABEL: convert_to_bitmask16: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: adrp x8, lCPI0_0@PAGE |
| ; CHECK-SD-NEXT: cmeq.16b v0, v0, #0 |
| ; CHECK-SD-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] |
| ; CHECK-SD-NEXT: bic.16b v0, v1, v0 |
| ; CHECK-SD-NEXT: ext.16b v1, v0, v0, #8 |
| ; CHECK-SD-NEXT: zip1.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: addv.8h h0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask16: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: cmtst.16b v0, v0, v0 |
| ; CHECK-GI-NEXT: umov.b w8, v0[1] |
| ; CHECK-GI-NEXT: umov.b w9, v0[0] |
| ; CHECK-GI-NEXT: umov.b w10, v0[2] |
| ; CHECK-GI-NEXT: umov.b w11, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w9, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[4] |
| ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[5] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[6] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[7] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[8] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[9] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[10] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #8 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[11] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #9 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[12] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #10 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[13] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #11 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[14] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #12 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[15] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #13 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #14 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #15 |
| ; CHECK-GI-NEXT: strh w8, [sp, #14] |
| ; CHECK-GI-NEXT: and w0, w8, #0xffff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| ; Actual conversion |
| |
| %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer |
| %bitmask = bitcast <16 x i1> %cmp_result to i16 |
| ret i16 %bitmask |
| } |
| |
| define i16 @convert_to_bitmask8(<8 x i16> %vec) { |
| ; CHECK-SD-LABEL: convert_to_bitmask8: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: adrp x8, lCPI1_0@PAGE |
| ; CHECK-SD-NEXT: cmeq.8h v0, v0, #0 |
| ; CHECK-SD-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] |
| ; CHECK-SD-NEXT: bic.16b v0, v1, v0 |
| ; CHECK-SD-NEXT: addv.8h h0, v0 |
| ; CHECK-SD-NEXT: fmov w8, s0 |
| ; CHECK-SD-NEXT: and w0, w8, #0xff |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask8: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: cmtst.8h v0, v0, v0 |
| ; CHECK-GI-NEXT: xtn.8b v0, v0 |
| ; CHECK-GI-NEXT: umov.b w8, v0[1] |
| ; CHECK-GI-NEXT: umov.b w9, v0[0] |
| ; CHECK-GI-NEXT: umov.b w10, v0[2] |
| ; CHECK-GI-NEXT: umov.b w11, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w9, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[4] |
| ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[5] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[6] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[7] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| |
| %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer |
| %bitmask = bitcast <8 x i1> %cmp_result to i8 |
| %extended_bitmask = zext i8 %bitmask to i16 |
| ret i16 %extended_bitmask |
| } |
| |
| define i4 @convert_to_bitmask4(<4 x i32> %vec) { |
| ; CHECK-SD-LABEL: convert_to_bitmask4: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: adrp x8, lCPI2_0@PAGE |
| ; CHECK-SD-NEXT: cmeq.4s v0, v0, #0 |
| ; CHECK-SD-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] |
| ; CHECK-SD-NEXT: bic.16b v0, v1, v0 |
| ; CHECK-SD-NEXT: addv.4s s0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask4: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: cmtst.4s v0, v0, v0 |
| ; CHECK-GI-NEXT: mov.s w8, v0[1] |
| ; CHECK-GI-NEXT: mov.s w9, v0[2] |
| ; CHECK-GI-NEXT: fmov w11, s0 |
| ; CHECK-GI-NEXT: mov.s w10, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w11, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w9, #0x1 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| |
| %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer |
| %bitmask = bitcast <4 x i1> %cmp_result to i4 |
| ret i4 %bitmask |
| } |
| |
| define i8 @convert_to_bitmask2(<2 x i64> %vec) { |
| ; CHECK-LABEL: convert_to_bitmask2: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: adrp x8, lCPI3_0@PAGE |
| ; CHECK-NEXT: cmeq.2d v0, v0, #0 |
| ; CHECK-NEXT: ldr q1, [x8, lCPI3_0@PAGEOFF] |
| ; CHECK-NEXT: bic.16b v0, v1, v0 |
| ; CHECK-NEXT: addp.2d d0, v0 |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: and w0, w8, #0x3 |
| ; CHECK-NEXT: ret |
| |
| |
| %cmp_result = icmp ne <2 x i64> %vec, zeroinitializer |
| %bitmask = bitcast <2 x i1> %cmp_result to i2 |
| %extended_bitmask = zext i2 %bitmask to i8 |
| ret i8 %extended_bitmask |
| } |
| |
| ; Clang's __builtin_convertvector adds an undef vector concat for vectors with <8 elements. |
| define i8 @clang_builtins_undef_concat_convert_to_bitmask4(<4 x i32> %vec) { |
| ; CHECK-LABEL: clang_builtins_undef_concat_convert_to_bitmask4: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: adrp x8, lCPI4_0@PAGE |
| ; CHECK-NEXT: cmeq.4s v0, v0, #0 |
| ; CHECK-NEXT: ldr q1, [x8, lCPI4_0@PAGEOFF] |
| ; CHECK-NEXT: bic.16b v0, v1, v0 |
| ; CHECK-NEXT: addv.4s s0, v0 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| |
| |
| %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer |
| %vector_pad = shufflevector <4 x i1> %cmp_result, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> |
| %bitmask = bitcast <8 x i1> %vector_pad to i8 |
| ret i8 %bitmask |
| } |
| |
| |
| define i4 @convert_to_bitmask_no_compare(<4 x i32> %vec1, <4 x i32> %vec2) { |
| ; CHECK-SD-LABEL: convert_to_bitmask_no_compare: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: and.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: adrp x8, lCPI5_0@PAGE |
| ; CHECK-SD-NEXT: ldr q1, [x8, lCPI5_0@PAGEOFF] |
| ; CHECK-SD-NEXT: shl.4s v0, v0, #31 |
| ; CHECK-SD-NEXT: cmlt.4s v0, v0, #0 |
| ; CHECK-SD-NEXT: and.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: addv.4s s0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask_no_compare: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: and.16b v0, v0, v1 |
| ; CHECK-GI-NEXT: mov.s w8, v0[1] |
| ; CHECK-GI-NEXT: mov.s w9, v0[2] |
| ; CHECK-GI-NEXT: fmov w11, s0 |
| ; CHECK-GI-NEXT: mov.s w10, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w11, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w9, #0x1 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| |
| %cmp = and <4 x i32> %vec1, %vec2 |
| %trunc = trunc <4 x i32> %cmp to <4 x i1> |
| %bitmask = bitcast <4 x i1> %trunc to i4 |
| ret i4 %bitmask |
| } |
| |
| define i4 @convert_to_bitmask_with_compare_chain(<4 x i32> %vec1, <4 x i32> %vec2) { |
| ; CHECK-SD-LABEL: convert_to_bitmask_with_compare_chain: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: cmeq.4s v2, v0, #0 |
| ; CHECK-SD-NEXT: cmeq.4s v0, v0, v1 |
| ; CHECK-SD-NEXT: adrp x8, lCPI6_0@PAGE |
| ; CHECK-SD-NEXT: ldr q1, [x8, lCPI6_0@PAGEOFF] |
| ; CHECK-SD-NEXT: bic.16b v0, v0, v2 |
| ; CHECK-SD-NEXT: and.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: addv.4s s0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask_with_compare_chain: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: cmeq.4s v2, v0, #0 |
| ; CHECK-GI-NEXT: cmeq.4s v0, v0, v1 |
| ; CHECK-GI-NEXT: bic.16b v0, v0, v2 |
| ; CHECK-GI-NEXT: mov.s w8, v0[1] |
| ; CHECK-GI-NEXT: mov.s w9, v0[2] |
| ; CHECK-GI-NEXT: fmov w11, s0 |
| ; CHECK-GI-NEXT: mov.s w10, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w11, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w9, #0x1 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| |
| %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer |
| %cmp2 = icmp eq <4 x i32> %vec1, %vec2 |
| %cmp3 = and <4 x i1> %cmp1, %cmp2 |
| %bitmask = bitcast <4 x i1> %cmp3 to i4 |
| ret i4 %bitmask |
| } |
| |
| define i4 @convert_to_bitmask_with_trunc_in_chain(<4 x i32> %vec1, <4 x i32> %vec2) { |
| ; CHECK-SD-LABEL: convert_to_bitmask_with_trunc_in_chain: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: cmeq.4s v0, v0, #0 |
| ; CHECK-SD-NEXT: adrp x8, lCPI7_0@PAGE |
| ; CHECK-SD-NEXT: bic.16b v0, v1, v0 |
| ; CHECK-SD-NEXT: ldr q1, [x8, lCPI7_0@PAGEOFF] |
| ; CHECK-SD-NEXT: shl.4s v0, v0, #31 |
| ; CHECK-SD-NEXT: cmlt.4s v0, v0, #0 |
| ; CHECK-SD-NEXT: and.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: addv.4s s0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask_with_trunc_in_chain: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: cmeq.4s v0, v0, #0 |
| ; CHECK-GI-NEXT: bic.16b v0, v1, v0 |
| ; CHECK-GI-NEXT: mov.s w8, v0[1] |
| ; CHECK-GI-NEXT: mov.s w9, v0[2] |
| ; CHECK-GI-NEXT: fmov w11, s0 |
| ; CHECK-GI-NEXT: mov.s w10, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w11, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w9, #0x1 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| |
| %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer |
| %trunc_vec = trunc <4 x i32> %vec2 to <4 x i1> |
| %and_res = and <4 x i1> %cmp1, %trunc_vec |
| %bitmask = bitcast <4 x i1> %and_res to i4 |
| ret i4 %bitmask |
| } |
| |
| define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <4 x i32> %vec2) { |
| ; CHECK-SD-LABEL: convert_to_bitmask_with_unknown_type_in_long_chain: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: cmeq.4s v0, v0, #0 |
| ; CHECK-SD-NEXT: cmeq.4s v1, v1, #0 |
| ; CHECK-SD-NEXT: adrp x8, lCPI8_0@PAGE |
| ; CHECK-SD-NEXT: movi d2, #0x000000ffffffff |
| ; CHECK-SD-NEXT: movi d3, #0x00ffffffffffff |
| ; CHECK-SD-NEXT: bic.16b v0, v1, v0 |
| ; CHECK-SD-NEXT: movi d1, #0xffff0000ffff0000 |
| ; CHECK-SD-NEXT: xtn.4h v0, v0 |
| ; CHECK-SD-NEXT: orr.8b v0, v0, v2 |
| ; CHECK-SD-NEXT: movi d2, #0x00ffffffff0000 |
| ; CHECK-SD-NEXT: eor.8b v1, v0, v1 |
| ; CHECK-SD-NEXT: eor.8b v0, v0, v2 |
| ; CHECK-SD-NEXT: mov.h v1[2], wzr |
| ; CHECK-SD-NEXT: orr.8b v0, v0, v3 |
| ; CHECK-SD-NEXT: orr.8b v0, v1, v0 |
| ; CHECK-SD-NEXT: ldr d1, [x8, lCPI8_0@PAGEOFF] |
| ; CHECK-SD-NEXT: shl.4h v0, v0, #15 |
| ; CHECK-SD-NEXT: cmlt.4h v0, v0, #0 |
| ; CHECK-SD-NEXT: and.8b v0, v0, v1 |
| ; CHECK-SD-NEXT: addv.4h h0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask_with_unknown_type_in_long_chain: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: mov w8, #1 ; =0x1 |
| ; CHECK-GI-NEXT: mov w9, #0 ; =0x0 |
| ; CHECK-GI-NEXT: cmeq.4s v0, v0, #0 |
| ; CHECK-GI-NEXT: fmov s2, w8 |
| ; CHECK-GI-NEXT: fmov s4, w9 |
| ; CHECK-GI-NEXT: cmeq.4s v1, v1, #0 |
| ; CHECK-GI-NEXT: mov.16b v3, v2 |
| ; CHECK-GI-NEXT: mov.16b v5, v4 |
| ; CHECK-GI-NEXT: mov.h v4[1], w8 |
| ; CHECK-GI-NEXT: bic.16b v0, v1, v0 |
| ; CHECK-GI-NEXT: mov.16b v1, v2 |
| ; CHECK-GI-NEXT: mov.h v2[1], w8 |
| ; CHECK-GI-NEXT: mov.h v3[1], w8 |
| ; CHECK-GI-NEXT: mov.h v5[1], w8 |
| ; CHECK-GI-NEXT: mov.h v1[1], w8 |
| ; CHECK-GI-NEXT: mov.h v4[2], w8 |
| ; CHECK-GI-NEXT: xtn.4h v0, v0 |
| ; CHECK-GI-NEXT: mov.h v2[2], w8 |
| ; CHECK-GI-NEXT: mov.h v3[2], w9 |
| ; CHECK-GI-NEXT: mov.h v5[2], w9 |
| ; CHECK-GI-NEXT: mov.h v1[2], w9 |
| ; CHECK-GI-NEXT: mov.h v4[3], w9 |
| ; CHECK-GI-NEXT: mov.h v2[3], w9 |
| ; CHECK-GI-NEXT: mov.h v3[3], w9 |
| ; CHECK-GI-NEXT: mov.h v5[3], w8 |
| ; CHECK-GI-NEXT: mov.h v1[3], w8 |
| ; CHECK-GI-NEXT: orr.8b v0, v0, v3 |
| ; CHECK-GI-NEXT: eor.8b v3, v0, v5 |
| ; CHECK-GI-NEXT: eor.8b v0, v4, v0 |
| ; CHECK-GI-NEXT: and.8b v1, v3, v1 |
| ; CHECK-GI-NEXT: orr.8b v0, v2, v0 |
| ; CHECK-GI-NEXT: orr.8b v0, v1, v0 |
| ; CHECK-GI-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-GI-NEXT: mov.s w8, v0[1] |
| ; CHECK-GI-NEXT: mov.s w9, v0[2] |
| ; CHECK-GI-NEXT: fmov w11, s0 |
| ; CHECK-GI-NEXT: mov.s w10, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w11, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w9, #0x1 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| |
| %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer |
| %cmp2 = icmp eq <4 x i32> %vec2, zeroinitializer |
| |
| ; Artificially make this a long chain to hide the original type |
| %chain1 = and <4 x i1> %cmp1, %cmp2; |
| %chain2 = or <4 x i1> %chain1, <i1 1, i1 1, i1 0, i1 0>; |
| %chain3 = xor <4 x i1> %chain2, <i1 0, i1 1, i1 0, i1 1>; |
| %chain4 = and <4 x i1> %chain3, <i1 1, i1 1, i1 0, i1 1>; |
| %chain5 = or <4 x i1> %chain4, <i1 1, i1 1, i1 1, i1 0>; |
| %chain6 = xor <4 x i1> <i1 0, i1 1, i1 1, i1 0>, %chain2; |
| %chain7 = or <4 x i1> %chain5, %chain6; |
| %bitmask = bitcast <4 x i1> %chain7 to i4 |
| ret i4 %bitmask |
| } |
| |
| define i4 @convert_to_bitmask_with_different_types_in_chain(<4 x i16> %vec1, <4 x i32> %vec2) { |
| ; CHECK-SD-LABEL: convert_to_bitmask_with_different_types_in_chain: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: cmeq.4s v1, v1, #0 |
| ; CHECK-SD-NEXT: cmeq.4h v0, v0, #0 |
| ; CHECK-SD-NEXT: adrp x8, lCPI9_0@PAGE |
| ; CHECK-SD-NEXT: xtn.4h v1, v1 |
| ; CHECK-SD-NEXT: orn.8b v0, v1, v0 |
| ; CHECK-SD-NEXT: ldr d1, [x8, lCPI9_0@PAGEOFF] |
| ; CHECK-SD-NEXT: and.8b v0, v0, v1 |
| ; CHECK-SD-NEXT: addv.4h h0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask_with_different_types_in_chain: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: cmeq.4s v1, v1, #0 |
| ; CHECK-GI-NEXT: cmeq.4h v0, v0, #0 |
| ; CHECK-GI-NEXT: xtn.4h v1, v1 |
| ; CHECK-GI-NEXT: orn.8b v0, v1, v0 |
| ; CHECK-GI-NEXT: ushll.4s v0, v0, #0 |
| ; CHECK-GI-NEXT: mov.s w8, v0[1] |
| ; CHECK-GI-NEXT: mov.s w9, v0[2] |
| ; CHECK-GI-NEXT: fmov w11, s0 |
| ; CHECK-GI-NEXT: mov.s w10, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w11, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w9, #0x1 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| |
| %cmp1 = icmp ne <4 x i16> %vec1, zeroinitializer |
| %cmp2 = icmp eq <4 x i32> %vec2, zeroinitializer |
| %chain1 = or <4 x i1> %cmp1, %cmp2 |
| %bitmask = bitcast <4 x i1> %chain1 to i4 |
| ret i4 %bitmask |
| } |
| |
| define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) { |
| ; CHECK-SD-LABEL: convert_to_bitmask_without_knowing_type: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: shl.16b v0, v0, #7 |
| ; CHECK-SD-NEXT: adrp x8, lCPI10_0@PAGE |
| ; CHECK-SD-NEXT: ldr q1, [x8, lCPI10_0@PAGEOFF] |
| ; CHECK-SD-NEXT: cmlt.16b v0, v0, #0 |
| ; CHECK-SD-NEXT: and.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: ext.16b v1, v0, v0, #8 |
| ; CHECK-SD-NEXT: zip1.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: addv.8h h0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask_without_knowing_type: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: umov.b w8, v0[1] |
| ; CHECK-GI-NEXT: umov.b w9, v0[0] |
| ; CHECK-GI-NEXT: umov.b w10, v0[2] |
| ; CHECK-GI-NEXT: umov.b w11, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w9, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[4] |
| ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[5] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[6] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[7] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[8] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[9] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[10] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #8 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[11] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #9 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[12] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #10 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[13] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #11 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[14] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #12 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[15] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #13 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #14 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #15 |
| ; CHECK-GI-NEXT: strh w8, [sp, #14] |
| ; CHECK-GI-NEXT: and w0, w8, #0xffff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| %bitmask = bitcast <16 x i1> %vec to i16 |
| ret i16 %bitmask |
| } |
| |
| define i2 @convert_to_bitmask_2xi32(<2 x i32> %vec) { |
| ; CHECK-LABEL: convert_to_bitmask_2xi32: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: adrp x8, lCPI11_0@PAGE |
| ; CHECK-NEXT: cmeq.2s v0, v0, #0 |
| ; CHECK-NEXT: ldr d1, [x8, lCPI11_0@PAGEOFF] |
| ; CHECK-NEXT: bic.8b v0, v1, v0 |
| ; CHECK-NEXT: addp.2s v0, v0, v0 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| |
| %cmp_result = icmp ne <2 x i32> %vec, zeroinitializer |
| %bitmask = bitcast <2 x i1> %cmp_result to i2 |
| ret i2 %bitmask |
| } |
| |
| define i4 @convert_to_bitmask_4xi8(<4 x i8> %vec) { |
| ; CHECK-SD-LABEL: convert_to_bitmask_4xi8: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8 |
| ; CHECK-SD-NEXT: adrp x8, lCPI12_0@PAGE |
| ; CHECK-SD-NEXT: ldr d1, [x8, lCPI12_0@PAGEOFF] |
| ; CHECK-SD-NEXT: cmeq.4h v0, v0, #0 |
| ; CHECK-SD-NEXT: bic.8b v0, v1, v0 |
| ; CHECK-SD-NEXT: addv.4h h0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask_4xi8: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: mov w8, #0 ; =0x0 |
| ; CHECK-GI-NEXT: uzp1.8b v0, v0, v0 |
| ; CHECK-GI-NEXT: fmov s1, w8 |
| ; CHECK-GI-NEXT: mov.b v1[1], w8 |
| ; CHECK-GI-NEXT: mov.b v1[2], w8 |
| ; CHECK-GI-NEXT: mov.b v1[3], w8 |
| ; CHECK-GI-NEXT: cmeq.8b v0, v0, v1 |
| ; CHECK-GI-NEXT: mvn.8b v0, v0 |
| ; CHECK-GI-NEXT: umov.b w8, v0[0] |
| ; CHECK-GI-NEXT: umov.b w9, v0[1] |
| ; CHECK-GI-NEXT: mov.s v1[0], w8 |
| ; CHECK-GI-NEXT: umov.b w8, v0[2] |
| ; CHECK-GI-NEXT: mov.s v1[1], w9 |
| ; CHECK-GI-NEXT: umov.b w9, v0[3] |
| ; CHECK-GI-NEXT: mov.s v1[2], w8 |
| ; CHECK-GI-NEXT: mov.s v1[3], w9 |
| ; CHECK-GI-NEXT: mov.s w8, v1[1] |
| ; CHECK-GI-NEXT: mov.s w9, v1[2] |
| ; CHECK-GI-NEXT: fmov w11, s1 |
| ; CHECK-GI-NEXT: mov.s w10, v1[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w11, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w9, #0x1 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| %cmp_result = icmp ne <4 x i8> %vec, zeroinitializer |
| %bitmask = bitcast <4 x i1> %cmp_result to i4 |
| ret i4 %bitmask |
| } |
| |
| define i8 @convert_to_bitmask_8xi2(<8 x i2> %vec) { |
| ; CHECK-LABEL: convert_to_bitmask_8xi2: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: movi.8b v1, #3 |
| ; CHECK-NEXT: adrp x8, lCPI13_0@PAGE |
| ; CHECK-NEXT: and.8b v0, v0, v1 |
| ; CHECK-NEXT: ldr d1, [x8, lCPI13_0@PAGEOFF] |
| ; CHECK-NEXT: cmeq.8b v0, v0, #0 |
| ; CHECK-NEXT: bic.8b v0, v1, v0 |
| ; CHECK-NEXT: addv.8b b0, v0 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| |
| %cmp_result = icmp ne <8 x i2> %vec, zeroinitializer |
| %bitmask = bitcast <8 x i1> %cmp_result to i8 |
| ret i8 %bitmask |
| } |
| |
| define i4 @convert_to_bitmask_float(<4 x float> %vec) { |
| ; CHECK-SD-LABEL: convert_to_bitmask_float: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: fcmgt.4s v1, v0, #0.0 |
| ; CHECK-SD-NEXT: fcmlt.4s v0, v0, #0.0 |
| ; CHECK-SD-NEXT: adrp x8, lCPI14_0@PAGE |
| ; CHECK-SD-NEXT: orr.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: ldr q1, [x8, lCPI14_0@PAGEOFF] |
| ; CHECK-SD-NEXT: and.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: addv.4s s0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_to_bitmask_float: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: fcmgt.4s v1, v0, #0.0 |
| ; CHECK-GI-NEXT: fcmlt.4s v0, v0, #0.0 |
| ; CHECK-GI-NEXT: orr.16b v0, v0, v1 |
| ; CHECK-GI-NEXT: mov.s w8, v0[1] |
| ; CHECK-GI-NEXT: mov.s w9, v0[2] |
| ; CHECK-GI-NEXT: fmov w11, s0 |
| ; CHECK-GI-NEXT: mov.s w10, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w11, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w9, #0x1 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| |
| %cmp_result = fcmp one <4 x float> %vec, zeroinitializer |
| %bitmask = bitcast <4 x i1> %cmp_result to i4 |
| ret i4 %bitmask |
| } |
| |
| ; Larger vector types don't map directly, but the can be split/truncated and then converted. |
| ; After the comparison against 0, this is truncated to <8 x i16>, which is valid again. |
| define i8 @convert_large_vector(<8 x i32> %vec) { |
| ; CHECK-SD-LABEL: convert_large_vector: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: cmeq.4s v1, v1, #0 |
| ; CHECK-SD-NEXT: cmeq.4s v0, v0, #0 |
| ; CHECK-SD-NEXT: adrp x8, lCPI15_0@PAGE |
| ; CHECK-SD-NEXT: uzp1.8h v0, v0, v1 |
| ; CHECK-SD-NEXT: ldr q1, [x8, lCPI15_0@PAGEOFF] |
| ; CHECK-SD-NEXT: bic.16b v0, v1, v0 |
| ; CHECK-SD-NEXT: addv.8h h0, v0 |
| ; CHECK-SD-NEXT: fmov w8, s0 |
| ; CHECK-SD-NEXT: and w0, w8, #0xff |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_large_vector: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: cmtst.4s v0, v0, v0 |
| ; CHECK-GI-NEXT: cmtst.4s v1, v1, v1 |
| ; CHECK-GI-NEXT: uzp1.8h v0, v0, v1 |
| ; CHECK-GI-NEXT: xtn.8b v0, v0 |
| ; CHECK-GI-NEXT: umov.b w8, v0[1] |
| ; CHECK-GI-NEXT: umov.b w9, v0[0] |
| ; CHECK-GI-NEXT: umov.b w10, v0[2] |
| ; CHECK-GI-NEXT: umov.b w11, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w9, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[4] |
| ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[5] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w10, v0[6] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[7] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| |
| %cmp_result = icmp ne <8 x i32> %vec, zeroinitializer |
| %bitmask = bitcast <8 x i1> %cmp_result to i8 |
| ret i8 %bitmask |
| } |
| |
| define i4 @convert_legalized_illegal_element_size(<4 x i22> %vec) { |
| ; CHECK-SD-LABEL: convert_legalized_illegal_element_size: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: movi.4s v1, #63, msl #16 |
| ; CHECK-SD-NEXT: adrp x8, lCPI16_0@PAGE |
| ; CHECK-SD-NEXT: cmtst.4s v0, v0, v1 |
| ; CHECK-SD-NEXT: ldr d1, [x8, lCPI16_0@PAGEOFF] |
| ; CHECK-SD-NEXT: xtn.4h v0, v0 |
| ; CHECK-SD-NEXT: and.8b v0, v0, v1 |
| ; CHECK-SD-NEXT: addv.4h h0, v0 |
| ; CHECK-SD-NEXT: fmov w0, s0 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: convert_legalized_illegal_element_size: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: movi.4s v1, #63, msl #16 |
| ; CHECK-GI-NEXT: cmtst.4s v0, v0, v1 |
| ; CHECK-GI-NEXT: mov.s w8, v0[1] |
| ; CHECK-GI-NEXT: mov.s w9, v0[2] |
| ; CHECK-GI-NEXT: fmov w11, s0 |
| ; CHECK-GI-NEXT: mov.s w10, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w11, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w9, #0x1 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| %cmp_result = icmp ne <4 x i22> %vec, zeroinitializer |
| %bitmask = bitcast <4 x i1> %cmp_result to i4 |
| ret i4 %bitmask |
| } |
| |
| ; This may still be converted as a v8i8 after the vector concat (but not as v4iX). |
| define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) { |
| ; CHECK-LABEL: no_direct_convert_for_bad_concat: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: cmtst.4s v0, v0, v0 |
| ; CHECK-NEXT: adrp x8, lCPI17_0@PAGE |
| ; CHECK-NEXT: xtn.4h v0, v0 |
| ; CHECK-NEXT: umov.h w9, v0[0] |
| ; CHECK-NEXT: mov.b v1[4], w9 |
| ; CHECK-NEXT: umov.h w9, v0[1] |
| ; CHECK-NEXT: mov.b v1[5], w9 |
| ; CHECK-NEXT: umov.h w9, v0[2] |
| ; CHECK-NEXT: mov.b v1[6], w9 |
| ; CHECK-NEXT: umov.h w9, v0[3] |
| ; CHECK-NEXT: mov.b v1[7], w9 |
| ; CHECK-NEXT: shl.8b v0, v1, #7 |
| ; CHECK-NEXT: ldr d1, [x8, lCPI17_0@PAGEOFF] |
| ; CHECK-NEXT: cmlt.8b v0, v0, #0 |
| ; CHECK-NEXT: and.8b v0, v0, v1 |
| ; CHECK-NEXT: addv.8b b0, v0 |
| ; CHECK-NEXT: fmov w0, s0 |
| ; CHECK-NEXT: ret |
| |
| %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer |
| %vector_pad = shufflevector <4 x i1> poison, <4 x i1> %cmp_result, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7> |
| %bitmask = bitcast <8 x i1> %vector_pad to i8 |
| ret i8 %bitmask |
| } |
| |
| define <8 x i1> @no_convert_without_direct_bitcast(<8 x i16> %vec) { |
| ; CHECK-LABEL: no_convert_without_direct_bitcast: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: cmtst.8h v0, v0, v0 |
| ; CHECK-NEXT: xtn.8b v0, v0 |
| ; CHECK-NEXT: ret |
| |
| %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer |
| ret <8 x i1> %cmp_result |
| } |
| |
| define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) { |
| ; CHECK-SD-LABEL: no_combine_illegal_num_elements: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-SD-NEXT: fmov s0, w0 |
| ; CHECK-SD-NEXT: fmov s1, w4 |
| ; CHECK-SD-NEXT: mov.s v0[1], w1 |
| ; CHECK-SD-NEXT: mov.s v1[1], w5 |
| ; CHECK-SD-NEXT: mov.s v0[2], w2 |
| ; CHECK-SD-NEXT: cmeq.4s v1, v1, #0 |
| ; CHECK-SD-NEXT: mov.s v0[3], w3 |
| ; CHECK-SD-NEXT: cmeq.4s v0, v0, #0 |
| ; CHECK-SD-NEXT: uzp1.8h v0, v0, v1 |
| ; CHECK-SD-NEXT: mvn.16b v0, v0 |
| ; CHECK-SD-NEXT: xtn.8b v0, v0 |
| ; CHECK-SD-NEXT: umov.b w8, v0[0] |
| ; CHECK-SD-NEXT: umov.b w9, v0[1] |
| ; CHECK-SD-NEXT: umov.b w10, v0[2] |
| ; CHECK-SD-NEXT: and w8, w8, #0x1 |
| ; CHECK-SD-NEXT: bfi w8, w9, #1, #1 |
| ; CHECK-SD-NEXT: umov.b w9, v0[3] |
| ; CHECK-SD-NEXT: bfi w8, w10, #2, #1 |
| ; CHECK-SD-NEXT: umov.b w10, v0[4] |
| ; CHECK-SD-NEXT: bfi w8, w9, #3, #1 |
| ; CHECK-SD-NEXT: umov.b w9, v0[5] |
| ; CHECK-SD-NEXT: bfi w8, w10, #4, #1 |
| ; CHECK-SD-NEXT: orr w8, w8, w9, lsl #5 |
| ; CHECK-SD-NEXT: and w0, w8, #0x3f |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: no_combine_illegal_num_elements: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 |
| ; CHECK-GI-NEXT: mov.s v0[0], w0 |
| ; CHECK-GI-NEXT: mov.s v1[0], w4 |
| ; CHECK-GI-NEXT: mov.s v2[0], wzr |
| ; CHECK-GI-NEXT: mov.s v0[1], w1 |
| ; CHECK-GI-NEXT: mov.s v1[1], w5 |
| ; CHECK-GI-NEXT: mov.s v2[1], wzr |
| ; CHECK-GI-NEXT: mov.s v0[2], w2 |
| ; CHECK-GI-NEXT: cmeq.4s v1, v1, v2 |
| ; CHECK-GI-NEXT: mvn.16b v1, v1 |
| ; CHECK-GI-NEXT: mov.s v0[3], w3 |
| ; CHECK-GI-NEXT: cmtst.4s v0, v0, v0 |
| ; CHECK-GI-NEXT: mov.s w8, v0[1] |
| ; CHECK-GI-NEXT: mov.s w9, v0[2] |
| ; CHECK-GI-NEXT: mov.s w10, v0[3] |
| ; CHECK-GI-NEXT: mov.h v0[1], w8 |
| ; CHECK-GI-NEXT: mov.s w8, v1[1] |
| ; CHECK-GI-NEXT: mov.h v0[2], w9 |
| ; CHECK-GI-NEXT: mov.h v0[3], w10 |
| ; CHECK-GI-NEXT: mov.h v0[4], v1[0] |
| ; CHECK-GI-NEXT: mov.h v0[5], w8 |
| ; CHECK-GI-NEXT: umov.h w8, v0[1] |
| ; CHECK-GI-NEXT: umov.h w9, v0[0] |
| ; CHECK-GI-NEXT: umov.h w10, v0[2] |
| ; CHECK-GI-NEXT: umov.h w11, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: bfi w9, w8, #1, #31 |
| ; CHECK-GI-NEXT: and w8, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.h w10, v0[4] |
| ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.h w11, v0[5] |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 |
| ; CHECK-GI-NEXT: and w9, w10, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4 |
| ; CHECK-GI-NEXT: and w9, w11, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5 |
| ; CHECK-GI-NEXT: and w8, w8, #0x3f |
| ; CHECK-GI-NEXT: strb w8, [sp, #15] |
| ; CHECK-GI-NEXT: and w0, w8, #0xff |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| |
| %cmp_result = icmp ne <6 x i32> %vec, zeroinitializer |
| %bitmask = bitcast <6 x i1> %cmp_result to i6 |
| ret i6 %bitmask |
| } |
| |
| ; Only apply the combine when casting a vector to a scalar. |
| define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind { |
| ; CHECK-SD-LABEL: vector_to_vector_cast: |
| ; CHECK-SD: ; %bb.0: |
| ; CHECK-SD-NEXT: sub sp, sp, #16 |
| ; CHECK-SD-NEXT: shl.16b v0, v0, #7 |
| ; CHECK-SD-NEXT: adrp x8, lCPI20_0@PAGE |
| ; CHECK-SD-NEXT: ldr q1, [x8, lCPI20_0@PAGEOFF] |
| ; CHECK-SD-NEXT: add x8, sp, #14 |
| ; CHECK-SD-NEXT: cmlt.16b v0, v0, #0 |
| ; CHECK-SD-NEXT: and.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: ext.16b v1, v0, v0, #8 |
| ; CHECK-SD-NEXT: zip1.16b v0, v0, v1 |
| ; CHECK-SD-NEXT: addv.8h h0, v0 |
| ; CHECK-SD-NEXT: str h0, [sp, #14] |
| ; CHECK-SD-NEXT: ld1.b { v0 }[0], [x8] |
| ; CHECK-SD-NEXT: orr x8, x8, #0x1 |
| ; CHECK-SD-NEXT: ld1.b { v0 }[4], [x8] |
| ; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-SD-NEXT: add sp, sp, #16 |
| ; CHECK-SD-NEXT: ret |
| ; |
| ; CHECK-GI-LABEL: vector_to_vector_cast: |
| ; CHECK-GI: ; %bb.0: |
| ; CHECK-GI-NEXT: sub sp, sp, #16 |
| ; CHECK-GI-NEXT: umov.b w8, v0[1] |
| ; CHECK-GI-NEXT: mov d1, v0[1] |
| ; CHECK-GI-NEXT: umov.b w10, v0[1] |
| ; CHECK-GI-NEXT: umov.b w9, v0[0] |
| ; CHECK-GI-NEXT: umov.b w13, v0[0] |
| ; CHECK-GI-NEXT: umov.b w14, v0[2] |
| ; CHECK-GI-NEXT: umov.b w15, v0[3] |
| ; CHECK-GI-NEXT: umov.b w11, v0[2] |
| ; CHECK-GI-NEXT: umov.b w16, v0[4] |
| ; CHECK-GI-NEXT: umov.b w17, v0[5] |
| ; CHECK-GI-NEXT: umov.b w12, v0[3] |
| ; CHECK-GI-NEXT: and w8, w8, #0x1 |
| ; CHECK-GI-NEXT: and w10, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w0, v1[1] |
| ; CHECK-GI-NEXT: bfi w9, w8, #1, #31 |
| ; CHECK-GI-NEXT: bfi w13, w10, #1, #31 |
| ; CHECK-GI-NEXT: and w14, w14, #0x1 |
| ; CHECK-GI-NEXT: umov.b w8, v1[0] |
| ; CHECK-GI-NEXT: umov.b w10, v1[2] |
| ; CHECK-GI-NEXT: and w15, w15, #0x1 |
| ; CHECK-GI-NEXT: orr w13, w13, w14, lsl #2 |
| ; CHECK-GI-NEXT: umov.b w14, v1[3] |
| ; CHECK-GI-NEXT: and w11, w11, #0x1 |
| ; CHECK-GI-NEXT: and w0, w0, #0x1 |
| ; CHECK-GI-NEXT: and w16, w16, #0x1 |
| ; CHECK-GI-NEXT: orr w9, w9, w11, lsl #2 |
| ; CHECK-GI-NEXT: orr w13, w13, w15, lsl #3 |
| ; CHECK-GI-NEXT: umov.b w15, v1[4] |
| ; CHECK-GI-NEXT: umov.b w11, v0[6] |
| ; CHECK-GI-NEXT: bfi w8, w0, #1, #31 |
| ; CHECK-GI-NEXT: and w10, w10, #0x1 |
| ; CHECK-GI-NEXT: and w17, w17, #0x1 |
| ; CHECK-GI-NEXT: orr w13, w13, w16, lsl #4 |
| ; CHECK-GI-NEXT: and w14, w14, #0x1 |
| ; CHECK-GI-NEXT: umov.b w0, v0[7] |
| ; CHECK-GI-NEXT: orr w8, w8, w10, lsl #2 |
| ; CHECK-GI-NEXT: umov.b w10, v1[5] |
| ; CHECK-GI-NEXT: umov.b w16, v1[6] |
| ; CHECK-GI-NEXT: orr w13, w13, w17, lsl #5 |
| ; CHECK-GI-NEXT: umov.b w17, v0[4] |
| ; CHECK-GI-NEXT: and w15, w15, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w14, lsl #3 |
| ; CHECK-GI-NEXT: and w12, w12, #0x1 |
| ; CHECK-GI-NEXT: and w11, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w14, v1[7] |
| ; CHECK-GI-NEXT: orr w9, w9, w12, lsl #3 |
| ; CHECK-GI-NEXT: orr w11, w13, w11, lsl #6 |
| ; CHECK-GI-NEXT: orr w8, w8, w15, lsl #4 |
| ; CHECK-GI-NEXT: umov.b w15, v0[5] |
| ; CHECK-GI-NEXT: and w10, w10, #0x1 |
| ; CHECK-GI-NEXT: and w0, w0, #0x1 |
| ; CHECK-GI-NEXT: and w12, w17, #0x1 |
| ; CHECK-GI-NEXT: umov.b w13, v0[1] |
| ; CHECK-GI-NEXT: orr w8, w8, w10, lsl #5 |
| ; CHECK-GI-NEXT: and w16, w16, #0x1 |
| ; CHECK-GI-NEXT: orr w9, w9, w12, lsl #4 |
| ; CHECK-GI-NEXT: umov.b w10, v0[0] |
| ; CHECK-GI-NEXT: orr w11, w11, w0, lsl #7 |
| ; CHECK-GI-NEXT: and w14, w14, #0x1 |
| ; CHECK-GI-NEXT: and w12, w15, #0x1 |
| ; CHECK-GI-NEXT: umov.b w15, v0[2] |
| ; CHECK-GI-NEXT: orr w8, w8, w16, lsl #6 |
| ; CHECK-GI-NEXT: orr w9, w9, w12, lsl #5 |
| ; CHECK-GI-NEXT: umov.b w12, v0[6] |
| ; CHECK-GI-NEXT: strb w11, [sp, #8] |
| ; CHECK-GI-NEXT: and w11, w13, #0x1 |
| ; CHECK-GI-NEXT: umov.b w13, v0[3] |
| ; CHECK-GI-NEXT: orr w8, w8, w14, lsl #7 |
| ; CHECK-GI-NEXT: umov.b w14, v0[7] |
| ; CHECK-GI-NEXT: ldr b0, [sp, #8] |
| ; CHECK-GI-NEXT: bfi w10, w11, #1, #31 |
| ; CHECK-GI-NEXT: and w11, w15, #0x1 |
| ; CHECK-GI-NEXT: strb w8, [sp, #9] |
| ; CHECK-GI-NEXT: umov.b w15, v0[4] |
| ; CHECK-GI-NEXT: and w8, w12, #0x1 |
| ; CHECK-GI-NEXT: orr w10, w10, w11, lsl #2 |
| ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #6 |
| ; CHECK-GI-NEXT: and w9, w13, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[1] |
| ; CHECK-GI-NEXT: orr w9, w10, w9, lsl #3 |
| ; CHECK-GI-NEXT: umov.b w10, v0[5] |
| ; CHECK-GI-NEXT: umov.b w12, v0[0] |
| ; CHECK-GI-NEXT: and w13, w14, #0x1 |
| ; CHECK-GI-NEXT: umov.b w16, v0[2] |
| ; CHECK-GI-NEXT: umov.b w17, v0[3] |
| ; CHECK-GI-NEXT: and w14, w15, #0x1 |
| ; CHECK-GI-NEXT: umov.b w15, v0[2] |
| ; CHECK-GI-NEXT: orr w8, w8, w13, lsl #7 |
| ; CHECK-GI-NEXT: orr w9, w9, w14, lsl #4 |
| ; CHECK-GI-NEXT: umov.b w13, v0[6] |
| ; CHECK-GI-NEXT: and w11, w11, #0x1 |
| ; CHECK-GI-NEXT: umov.b w14, v0[3] |
| ; CHECK-GI-NEXT: strb w8, [sp, #10] |
| ; CHECK-GI-NEXT: and w8, w10, #0x1 |
| ; CHECK-GI-NEXT: bfi w12, w11, #1, #31 |
| ; CHECK-GI-NEXT: orr w8, w9, w8, lsl #5 |
| ; CHECK-GI-NEXT: umov.b w10, v0[4] |
| ; CHECK-GI-NEXT: and w9, w15, #0x1 |
| ; CHECK-GI-NEXT: umov.b w11, v0[7] |
| ; CHECK-GI-NEXT: umov.b w15, v0[1] |
| ; CHECK-GI-NEXT: orr w9, w12, w9, lsl #2 |
| ; CHECK-GI-NEXT: umov.b w12, v0[5] |
| ; CHECK-GI-NEXT: and w13, w13, #0x1 |
| ; CHECK-GI-NEXT: and w14, w14, #0x1 |
| ; CHECK-GI-NEXT: orr w8, w8, w13, lsl #6 |
| ; CHECK-GI-NEXT: umov.b w13, v0[0] |
| ; CHECK-GI-NEXT: orr w9, w9, w14, lsl #3 |
| ; CHECK-GI-NEXT: and w10, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w14, v0[6] |
| ; CHECK-GI-NEXT: and w11, w11, #0x1 |
| ; CHECK-GI-NEXT: and w15, w15, #0x1 |
| ; CHECK-GI-NEXT: umov.b w0, v0[3] |
| ; CHECK-GI-NEXT: orr w9, w9, w10, lsl #4 |
| ; CHECK-GI-NEXT: and w10, w12, #0x1 |
| ; CHECK-GI-NEXT: umov.b w12, v0[7] |
| ; CHECK-GI-NEXT: orr w8, w8, w11, lsl #7 |
| ; CHECK-GI-NEXT: bfi w13, w15, #1, #31 |
| ; CHECK-GI-NEXT: and w11, w16, #0x1 |
| ; CHECK-GI-NEXT: orr w9, w9, w10, lsl #5 |
| ; CHECK-GI-NEXT: and w10, w14, #0x1 |
| ; CHECK-GI-NEXT: umov.b w14, v0[4] |
| ; CHECK-GI-NEXT: strb w8, [sp, #11] |
| ; CHECK-GI-NEXT: umov.b w15, v0[1] |
| ; CHECK-GI-NEXT: umov.b w16, v0[3] |
| ; CHECK-GI-NEXT: orr w8, w9, w10, lsl #6 |
| ; CHECK-GI-NEXT: orr w9, w13, w11, lsl #2 |
| ; CHECK-GI-NEXT: and w10, w12, #0x1 |
| ; CHECK-GI-NEXT: and w11, w17, #0x1 |
| ; CHECK-GI-NEXT: umov.b w12, v0[5] |
| ; CHECK-GI-NEXT: umov.b w17, v0[0] |
| ; CHECK-GI-NEXT: orr w8, w8, w10, lsl #7 |
| ; CHECK-GI-NEXT: orr w9, w9, w11, lsl #3 |
| ; CHECK-GI-NEXT: umov.b w10, v0[1] |
| ; CHECK-GI-NEXT: and w11, w14, #0x1 |
| ; CHECK-GI-NEXT: umov.b w14, v0[0] |
| ; CHECK-GI-NEXT: and w15, w15, #0x1 |
| ; CHECK-GI-NEXT: orr w9, w9, w11, lsl #4 |
| ; CHECK-GI-NEXT: umov.b w11, v0[2] |
| ; CHECK-GI-NEXT: umov.b w13, v0[6] |
| ; CHECK-GI-NEXT: and w12, w12, #0x1 |
| ; CHECK-GI-NEXT: bfi w17, w15, #1, #31 |
| ; CHECK-GI-NEXT: umov.b w15, v0[5] |
| ; CHECK-GI-NEXT: orr w9, w9, w12, lsl #5 |
| ; CHECK-GI-NEXT: and w10, w10, #0x1 |
| ; CHECK-GI-NEXT: umov.b w12, v0[2] |
| ; CHECK-GI-NEXT: bfi w14, w10, #1, #31 |
| ; CHECK-GI-NEXT: umov.b w10, v0[4] |
| ; CHECK-GI-NEXT: ldr b1, [sp, #9] |
| ; CHECK-GI-NEXT: and w11, w11, #0x1 |
| ; CHECK-GI-NEXT: and w13, w13, #0x1 |
| ; CHECK-GI-NEXT: strb w8, [sp, #12] |
| ; CHECK-GI-NEXT: orr w11, w14, w11, lsl #2 |
| ; CHECK-GI-NEXT: and w14, w16, #0x1 |
| ; CHECK-GI-NEXT: umov.b w16, v0[4] |
| ; CHECK-GI-NEXT: and w12, w12, #0x1 |
| ; CHECK-GI-NEXT: and w15, w15, #0x1 |
| ; CHECK-GI-NEXT: orr w9, w9, w13, lsl #6 |
| ; CHECK-GI-NEXT: orr w11, w11, w14, lsl #3 |
| ; CHECK-GI-NEXT: orr w12, w17, w12, lsl #2 |
| ; CHECK-GI-NEXT: and w10, w10, #0x1 |
| ; CHECK-GI-NEXT: and w17, w0, #0x1 |
| ; CHECK-GI-NEXT: umov.b w0, v0[5] |
| ; CHECK-GI-NEXT: umov.b w14, v0[6] |
| ; CHECK-GI-NEXT: orr w10, w11, w10, lsl #4 |
| ; CHECK-GI-NEXT: orr w12, w12, w17, lsl #3 |
| ; CHECK-GI-NEXT: umov.b w11, v0[7] |
| ; CHECK-GI-NEXT: and w16, w16, #0x1 |
| ; CHECK-GI-NEXT: umov.b w17, v0[6] |
| ; CHECK-GI-NEXT: orr w10, w10, w15, lsl #5 |
| ; CHECK-GI-NEXT: umov.b w15, v0[7] |
| ; CHECK-GI-NEXT: orr w12, w12, w16, lsl #4 |
| ; CHECK-GI-NEXT: and w16, w0, #0x1 |
| ; CHECK-GI-NEXT: umov.b w0, v0[7] |
| ; CHECK-GI-NEXT: and w14, w14, #0x1 |
| ; CHECK-GI-NEXT: orr w12, w12, w16, lsl #5 |
| ; CHECK-GI-NEXT: orr w10, w10, w14, lsl #6 |
| ; CHECK-GI-NEXT: and w11, w11, #0x1 |
| ; CHECK-GI-NEXT: and w13, w17, #0x1 |
| ; CHECK-GI-NEXT: orr w9, w9, w11, lsl #7 |
| ; CHECK-GI-NEXT: mov.s v0[1], v1[0] |
| ; CHECK-GI-NEXT: orr w11, w12, w13, lsl #6 |
| ; CHECK-GI-NEXT: and w12, w15, #0x1 |
| ; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0 |
| ; CHECK-GI-NEXT: orr w8, w10, w12, lsl #7 |
| ; CHECK-GI-NEXT: and w10, w0, #0x1 |
| ; CHECK-GI-NEXT: strb w9, [sp, #13] |
| ; CHECK-GI-NEXT: orr w9, w11, w10, lsl #7 |
| ; CHECK-GI-NEXT: strb w8, [sp, #14] |
| ; CHECK-GI-NEXT: strb w9, [sp, #15] |
| ; CHECK-GI-NEXT: add sp, sp, #16 |
| ; CHECK-GI-NEXT: ret |
| %bc = bitcast <16 x i1> %arg to <2 x i8> |
| ret <2 x i8> %bc |
| } |