| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64-none-elf %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE |
| ; RUN: llc -mtriple=aarch64_be-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE |
| |
| define void @if_then_else8(ptr %out, i8 %mask, ptr %if_true, ptr %if_false) { |
| ; CHECK-LE-LABEL: if_then_else8: |
| ; CHECK-LE: // %bb.0: // %start |
| ; CHECK-LE-NEXT: adrp x8, .LCPI0_1 |
| ; CHECK-LE-NEXT: dup v0.4s, w1 |
| ; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI0_1] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI0_0 |
| ; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI0_0] |
| ; CHECK-LE-NEXT: ldp q4, q3, [x2] |
| ; CHECK-LE-NEXT: and v1.16b, v0.16b, v1.16b |
| ; CHECK-LE-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-LE-NEXT: ldp q5, q2, [x3] |
| ; CHECK-LE-NEXT: cmeq v1.4s, v1.4s, #0 |
| ; CHECK-LE-NEXT: cmeq v0.4s, v0.4s, #0 |
| ; CHECK-LE-NEXT: bsl v1.16b, v2.16b, v3.16b |
| ; CHECK-LE-NEXT: bsl v0.16b, v5.16b, v4.16b |
| ; CHECK-LE-NEXT: stp q0, q1, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: if_then_else8: |
| ; CHECK-BE: // %bb.0: // %start |
| ; CHECK-BE-NEXT: adrp x8, .LCPI0_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_1 |
| ; CHECK-BE-NEXT: dup v0.4s, w1 |
| ; CHECK-BE-NEXT: ld1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI0_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_0 |
| ; CHECK-BE-NEXT: ld1 { v2.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #16 |
| ; CHECK-BE-NEXT: add x9, x3, #16 |
| ; CHECK-BE-NEXT: ld1 { v3.4s }, [x9] |
| ; CHECK-BE-NEXT: ld1 { v4.4s }, [x2] |
| ; CHECK-BE-NEXT: ld1 { v5.4s }, [x3] |
| ; CHECK-BE-NEXT: and v1.16b, v0.16b, v1.16b |
| ; CHECK-BE-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-BE-NEXT: ld1 { v2.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #16 |
| ; CHECK-BE-NEXT: cmeq v1.4s, v1.4s, #0 |
| ; CHECK-BE-NEXT: cmeq v0.4s, v0.4s, #0 |
| ; CHECK-BE-NEXT: bsl v1.16b, v3.16b, v2.16b |
| ; CHECK-BE-NEXT: bsl v0.16b, v5.16b, v4.16b |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x0] |
| ; CHECK-BE-NEXT: ret |
| start: |
| %t = load <8 x i32>, ptr %if_true, align 4 |
| %f = load <8 x i32>, ptr %if_false, align 4 |
| %m = bitcast i8 %mask to <8 x i1> |
| %s = select <8 x i1> %m, <8 x i32> %t, <8 x i32> %f |
| store <8 x i32> %s, ptr %out, align 4 |
| ret void |
| } |
| |
| define void @if_then_else16(ptr %out, i16 %mask, ptr %if_true, ptr %if_false) { |
| ; CHECK-LE-LABEL: if_then_else16: |
| ; CHECK-LE: // %bb.0: // %start |
| ; CHECK-LE-NEXT: dup v0.4s, w1 |
| ; CHECK-LE-NEXT: adrp x8, .LCPI1_3 |
| ; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI1_3] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI1_2 |
| ; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI1_2] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI1_1 |
| ; CHECK-LE-NEXT: ldr q3, [x8, :lo12:.LCPI1_1] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI1_0 |
| ; CHECK-LE-NEXT: and v1.16b, v0.16b, v1.16b |
| ; CHECK-LE-NEXT: ldr q4, [x8, :lo12:.LCPI1_0] |
| ; CHECK-LE-NEXT: and v2.16b, v0.16b, v2.16b |
| ; CHECK-LE-NEXT: and v3.16b, v0.16b, v3.16b |
| ; CHECK-LE-NEXT: ldp q6, q7, [x3, #32] |
| ; CHECK-LE-NEXT: and v0.16b, v0.16b, v4.16b |
| ; CHECK-LE-NEXT: cmeq v1.4s, v1.4s, #0 |
| ; CHECK-LE-NEXT: ldp q4, q5, [x2, #32] |
| ; CHECK-LE-NEXT: cmeq v2.4s, v2.4s, #0 |
| ; CHECK-LE-NEXT: ldp q16, q17, [x3] |
| ; CHECK-LE-NEXT: cmeq v3.4s, v3.4s, #0 |
| ; CHECK-LE-NEXT: cmeq v0.4s, v0.4s, #0 |
| ; CHECK-LE-NEXT: bsl v1.16b, v6.16b, v4.16b |
| ; CHECK-LE-NEXT: ldp q4, q6, [x2] |
| ; CHECK-LE-NEXT: bsl v2.16b, v7.16b, v5.16b |
| ; CHECK-LE-NEXT: bsl v3.16b, v16.16b, v4.16b |
| ; CHECK-LE-NEXT: bsl v0.16b, v17.16b, v6.16b |
| ; CHECK-LE-NEXT: stp q1, q2, [x0, #32] |
| ; CHECK-LE-NEXT: stp q3, q0, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: if_then_else16: |
| ; CHECK-BE: // %bb.0: // %start |
| ; CHECK-BE-NEXT: adrp x9, .LCPI1_3 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI1_3 |
| ; CHECK-BE-NEXT: add x8, x2, #32 |
| ; CHECK-BE-NEXT: ld1 { v2.4s }, [x9] |
| ; CHECK-BE-NEXT: adrp x9, .LCPI1_2 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI1_2 |
| ; CHECK-BE-NEXT: ld1 { v3.4s }, [x9] |
| ; CHECK-BE-NEXT: adrp x9, .LCPI1_1 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI1_1 |
| ; CHECK-BE-NEXT: dup v0.4s, w1 |
| ; CHECK-BE-NEXT: ld1 { v5.4s }, [x9] |
| ; CHECK-BE-NEXT: ld1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #48 |
| ; CHECK-BE-NEXT: add x9, x3, #48 |
| ; CHECK-BE-NEXT: ld1 { v7.4s }, [x2] |
| ; CHECK-BE-NEXT: ld1 { v4.4s }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI1_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI1_0 |
| ; CHECK-BE-NEXT: and v2.16b, v0.16b, v2.16b |
| ; CHECK-BE-NEXT: ld1 { v6.4s }, [x8] |
| ; CHECK-BE-NEXT: and v3.16b, v0.16b, v3.16b |
| ; CHECK-BE-NEXT: and v5.16b, v0.16b, v5.16b |
| ; CHECK-BE-NEXT: add x8, x3, #32 |
| ; CHECK-BE-NEXT: ld1 { v17.4s }, [x3] |
| ; CHECK-BE-NEXT: ld1 { v16.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #16 |
| ; CHECK-BE-NEXT: cmeq v2.4s, v2.4s, #0 |
| ; CHECK-BE-NEXT: and v0.16b, v0.16b, v6.16b |
| ; CHECK-BE-NEXT: ld1 { v6.4s }, [x9] |
| ; CHECK-BE-NEXT: cmeq v3.4s, v3.4s, #0 |
| ; CHECK-BE-NEXT: cmeq v5.4s, v5.4s, #0 |
| ; CHECK-BE-NEXT: add x9, x3, #16 |
| ; CHECK-BE-NEXT: bit v1.16b, v16.16b, v2.16b |
| ; CHECK-BE-NEXT: ld1 { v2.4s }, [x8] |
| ; CHECK-BE-NEXT: ld1 { v16.4s }, [x9] |
| ; CHECK-BE-NEXT: cmeq v0.4s, v0.4s, #0 |
| ; CHECK-BE-NEXT: bsl v3.16b, v6.16b, v4.16b |
| ; CHECK-BE-NEXT: mov v4.16b, v5.16b |
| ; CHECK-BE-NEXT: add x8, x0, #32 |
| ; CHECK-BE-NEXT: bsl v4.16b, v17.16b, v7.16b |
| ; CHECK-BE-NEXT: bsl v0.16b, v16.16b, v2.16b |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #48 |
| ; CHECK-BE-NEXT: st1 { v3.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #16 |
| ; CHECK-BE-NEXT: st1 { v4.4s }, [x0] |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x8] |
| ; CHECK-BE-NEXT: ret |
| start: |
| %t = load <16 x i32>, ptr %if_true, align 4 |
| %f = load <16 x i32>, ptr %if_false, align 4 |
| %m = bitcast i16 %mask to <16 x i1> |
| %s = select <16 x i1> %m, <16 x i32> %t, <16 x i32> %f |
| store <16 x i32> %s, ptr %out, align 4 |
| ret void |
| } |
| |
| define void @if_then_else32(ptr %out, i32 %mask, ptr %if_true, ptr %if_false) { |
| ; CHECK-LE-LABEL: if_then_else32: |
| ; CHECK-LE: // %bb.0: // %start |
| ; CHECK-LE-NEXT: adrp x8, .LCPI2_0 |
| ; CHECK-LE-NEXT: dup v7.4s, w1 |
| ; CHECK-LE-NEXT: ldr q5, [x8, :lo12:.LCPI2_0] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI2_1 |
| ; CHECK-LE-NEXT: ldr q16, [x8, :lo12:.LCPI2_1] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI2_6 |
| ; CHECK-LE-NEXT: ldr q19, [x8, :lo12:.LCPI2_6] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI2_7 |
| ; CHECK-LE-NEXT: and v5.16b, v7.16b, v5.16b |
| ; CHECK-LE-NEXT: ldr q20, [x8, :lo12:.LCPI2_7] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI2_2 |
| ; CHECK-LE-NEXT: and v16.16b, v7.16b, v16.16b |
| ; CHECK-LE-NEXT: ldr q21, [x8, :lo12:.LCPI2_2] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI2_5 |
| ; CHECK-LE-NEXT: and v19.16b, v7.16b, v19.16b |
| ; CHECK-LE-NEXT: ldr q22, [x8, :lo12:.LCPI2_5] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI2_4 |
| ; CHECK-LE-NEXT: and v20.16b, v7.16b, v20.16b |
| ; CHECK-LE-NEXT: ldr q23, [x8, :lo12:.LCPI2_4] |
| ; CHECK-LE-NEXT: ldp q4, q6, [x2, #96] |
| ; CHECK-LE-NEXT: ldp q17, q18, [x3, #96] |
| ; CHECK-LE-NEXT: and v22.16b, v7.16b, v22.16b |
| ; CHECK-LE-NEXT: cmeq v19.4s, v19.4s, #0 |
| ; CHECK-LE-NEXT: and v23.16b, v7.16b, v23.16b |
| ; CHECK-LE-NEXT: cmeq v20.4s, v20.4s, #0 |
| ; CHECK-LE-NEXT: ldp q3, q2, [x2, #64] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI2_3 |
| ; CHECK-LE-NEXT: cmeq v22.4s, v22.4s, #0 |
| ; CHECK-LE-NEXT: ldr q24, [x8, :lo12:.LCPI2_3] |
| ; CHECK-LE-NEXT: and v21.16b, v7.16b, v21.16b |
| ; CHECK-LE-NEXT: bit v6.16b, v18.16b, v19.16b |
| ; CHECK-LE-NEXT: ldp q18, q19, [x3, #64] |
| ; CHECK-LE-NEXT: cmeq v23.4s, v23.4s, #0 |
| ; CHECK-LE-NEXT: bit v4.16b, v17.16b, v20.16b |
| ; CHECK-LE-NEXT: and v7.16b, v7.16b, v24.16b |
| ; CHECK-LE-NEXT: cmeq v16.4s, v16.4s, #0 |
| ; CHECK-LE-NEXT: cmeq v5.4s, v5.4s, #0 |
| ; CHECK-LE-NEXT: cmeq v21.4s, v21.4s, #0 |
| ; CHECK-LE-NEXT: bit v3.16b, v18.16b, v22.16b |
| ; CHECK-LE-NEXT: ldp q1, q0, [x2, #32] |
| ; CHECK-LE-NEXT: bit v2.16b, v19.16b, v23.16b |
| ; CHECK-LE-NEXT: ldp q24, q25, [x2] |
| ; CHECK-LE-NEXT: ldp q17, q20, [x3, #32] |
| ; CHECK-LE-NEXT: cmeq v7.4s, v7.4s, #0 |
| ; CHECK-LE-NEXT: ldp q18, q22, [x3] |
| ; CHECK-LE-NEXT: stp q4, q6, [x0, #96] |
| ; CHECK-LE-NEXT: mov v4.16b, v16.16b |
| ; CHECK-LE-NEXT: stp q3, q2, [x0, #64] |
| ; CHECK-LE-NEXT: mov v3.16b, v5.16b |
| ; CHECK-LE-NEXT: bit v1.16b, v17.16b, v7.16b |
| ; CHECK-LE-NEXT: bit v0.16b, v20.16b, v21.16b |
| ; CHECK-LE-NEXT: bsl v4.16b, v18.16b, v24.16b |
| ; CHECK-LE-NEXT: bsl v3.16b, v22.16b, v25.16b |
| ; CHECK-LE-NEXT: stp q1, q0, [x0, #32] |
| ; CHECK-LE-NEXT: stp q4, q3, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: if_then_else32: |
| ; CHECK-BE: // %bb.0: // %start |
| ; CHECK-BE-NEXT: add x8, x2, #96 |
| ; CHECK-BE-NEXT: dup v19.4s, w1 |
| ; CHECK-BE-NEXT: add x9, x2, #112 |
| ; CHECK-BE-NEXT: ld1 { v5.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #80 |
| ; CHECK-BE-NEXT: ld1 { v6.4s }, [x9] |
| ; CHECK-BE-NEXT: ld1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #32 |
| ; CHECK-BE-NEXT: add x9, x2, #48 |
| ; CHECK-BE-NEXT: ld1 { v4.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #16 |
| ; CHECK-BE-NEXT: adrp x12, .LCPI2_7 |
| ; CHECK-BE-NEXT: add x12, x12, :lo12:.LCPI2_7 |
| ; CHECK-BE-NEXT: ld1 { v0.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x3, #96 |
| ; CHECK-BE-NEXT: ld1 { v7.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x3, #112 |
| ; CHECK-BE-NEXT: add x10, x2, #64 |
| ; CHECK-BE-NEXT: ld1 { v17.4s }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI2_5 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI2_5 |
| ; CHECK-BE-NEXT: ld1 { v20.4s }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI2_6 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI2_6 |
| ; CHECK-BE-NEXT: ld1 { v21.4s }, [x8] |
| ; CHECK-BE-NEXT: ld1 { v2.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x3, #64 |
| ; CHECK-BE-NEXT: ld1 { v22.4s }, [x12] |
| ; CHECK-BE-NEXT: ld1 { v16.4s }, [x10] |
| ; CHECK-BE-NEXT: ld1 { v18.4s }, [x9] |
| ; CHECK-BE-NEXT: and v20.16b, v19.16b, v20.16b |
| ; CHECK-BE-NEXT: add x8, x3, #80 |
| ; CHECK-BE-NEXT: adrp x12, .LCPI2_4 |
| ; CHECK-BE-NEXT: add x12, x12, :lo12:.LCPI2_4 |
| ; CHECK-BE-NEXT: and v21.16b, v19.16b, v21.16b |
| ; CHECK-BE-NEXT: ld1 { v23.4s }, [x8] |
| ; CHECK-BE-NEXT: ld1 { v24.4s }, [x12] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI2_3 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI2_3 |
| ; CHECK-BE-NEXT: cmeq v20.4s, v20.4s, #0 |
| ; CHECK-BE-NEXT: and v22.16b, v19.16b, v22.16b |
| ; CHECK-BE-NEXT: adrp x11, .LCPI2_2 |
| ; CHECK-BE-NEXT: add x11, x11, :lo12:.LCPI2_2 |
| ; CHECK-BE-NEXT: cmeq v21.4s, v21.4s, #0 |
| ; CHECK-BE-NEXT: adrp x9, .LCPI2_0 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI2_0 |
| ; CHECK-BE-NEXT: adrp x10, .LCPI2_1 |
| ; CHECK-BE-NEXT: add x10, x10, :lo12:.LCPI2_1 |
| ; CHECK-BE-NEXT: bit v16.16b, v18.16b, v20.16b |
| ; CHECK-BE-NEXT: ld1 { v18.4s }, [x8] |
| ; CHECK-BE-NEXT: ld1 { v25.4s }, [x9] |
| ; CHECK-BE-NEXT: bit v6.16b, v17.16b, v21.16b |
| ; CHECK-BE-NEXT: ld1 { v17.4s }, [x11] |
| ; CHECK-BE-NEXT: ld1 { v20.4s }, [x10] |
| ; CHECK-BE-NEXT: and v21.16b, v19.16b, v24.16b |
| ; CHECK-BE-NEXT: cmeq v22.4s, v22.4s, #0 |
| ; CHECK-BE-NEXT: add x8, x3, #32 |
| ; CHECK-BE-NEXT: and v18.16b, v19.16b, v18.16b |
| ; CHECK-BE-NEXT: and v25.16b, v19.16b, v25.16b |
| ; CHECK-BE-NEXT: ld1 { v24.4s }, [x8] |
| ; CHECK-BE-NEXT: and v17.16b, v19.16b, v17.16b |
| ; CHECK-BE-NEXT: and v20.16b, v19.16b, v20.16b |
| ; CHECK-BE-NEXT: add x8, x3, #48 |
| ; CHECK-BE-NEXT: cmeq v21.4s, v21.4s, #0 |
| ; CHECK-BE-NEXT: bit v5.16b, v7.16b, v22.16b |
| ; CHECK-BE-NEXT: ld1 { v19.4s }, [x8] |
| ; CHECK-BE-NEXT: cmeq v18.4s, v18.4s, #0 |
| ; CHECK-BE-NEXT: add x8, x3, #16 |
| ; CHECK-BE-NEXT: ld1 { v3.4s }, [x2] |
| ; CHECK-BE-NEXT: cmeq v17.4s, v17.4s, #0 |
| ; CHECK-BE-NEXT: ld1 { v7.4s }, [x3] |
| ; CHECK-BE-NEXT: ld1 { v22.4s }, [x8] |
| ; CHECK-BE-NEXT: cmeq v25.4s, v25.4s, #0 |
| ; CHECK-BE-NEXT: cmeq v20.4s, v20.4s, #0 |
| ; CHECK-BE-NEXT: bit v1.16b, v23.16b, v21.16b |
| ; CHECK-BE-NEXT: add x8, x0, #96 |
| ; CHECK-BE-NEXT: bit v4.16b, v24.16b, v18.16b |
| ; CHECK-BE-NEXT: st1 { v5.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #112 |
| ; CHECK-BE-NEXT: bit v2.16b, v19.16b, v17.16b |
| ; CHECK-BE-NEXT: st1 { v6.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #64 |
| ; CHECK-BE-NEXT: bit v3.16b, v7.16b, v20.16b |
| ; CHECK-BE-NEXT: st1 { v16.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #80 |
| ; CHECK-BE-NEXT: bit v0.16b, v22.16b, v25.16b |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #32 |
| ; CHECK-BE-NEXT: st1 { v4.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #48 |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #16 |
| ; CHECK-BE-NEXT: st1 { v3.4s }, [x0] |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x8] |
| ; CHECK-BE-NEXT: ret |
| start: |
| %t = load <32 x i32>, ptr %if_true, align 4 |
| %f = load <32 x i32>, ptr %if_false, align 4 |
| %m = bitcast i32 %mask to <32 x i1> |
| %s = select <32 x i1> %m, <32 x i32> %t, <32 x i32> %f |
| store <32 x i32> %s, ptr %out, align 4 |
| ret void |
| } |
| |
| define void @if_then_else64(ptr %out, i64 %mask, ptr %if_true, ptr %if_false) nounwind { |
| ; CHECK-LE-LABEL: if_then_else64: |
| ; CHECK-LE: // %bb.0: // %start |
| ; CHECK-LE-NEXT: sub sp, sp, #80 |
| ; CHECK-LE-NEXT: fmov d21, x1 |
| ; CHECK-LE-NEXT: adrp x8, .LCPI3_6 |
| ; CHECK-LE-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-LE-NEXT: ldr q26, [x8, :lo12:.LCPI3_6] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI3_7 |
| ; CHECK-LE-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-LE-NEXT: ldr q27, [x8, :lo12:.LCPI3_7] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI3_0 |
| ; CHECK-LE-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-LE-NEXT: dup v5.4s, v21.s[0] |
| ; CHECK-LE-NEXT: dup v28.4s, v21.s[1] |
| ; CHECK-LE-NEXT: ldr q22, [x8, :lo12:.LCPI3_0] |
| ; CHECK-LE-NEXT: ldp q25, q24, [x2, #96] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI3_3 |
| ; CHECK-LE-NEXT: ldp q21, q31, [x3, #96] |
| ; CHECK-LE-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-LE-NEXT: and v23.16b, v5.16b, v26.16b |
| ; CHECK-LE-NEXT: and v9.16b, v5.16b, v27.16b |
| ; CHECK-LE-NEXT: and v10.16b, v28.16b, v22.16b |
| ; CHECK-LE-NEXT: ldp q16, q0, [x2, #128] |
| ; CHECK-LE-NEXT: and v27.16b, v28.16b, v27.16b |
| ; CHECK-LE-NEXT: ldp q1, q7, [x2, #160] |
| ; CHECK-LE-NEXT: and v26.16b, v28.16b, v26.16b |
| ; CHECK-LE-NEXT: cmeq v8.4s, v23.4s, #0 |
| ; CHECK-LE-NEXT: ldr q23, [x8, :lo12:.LCPI3_3] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI3_4 |
| ; CHECK-LE-NEXT: ldr q13, [x8, :lo12:.LCPI3_4] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI3_5 |
| ; CHECK-LE-NEXT: cmeq v27.4s, v27.4s, #0 |
| ; CHECK-LE-NEXT: ldp q6, q2, [x2, #192] |
| ; CHECK-LE-NEXT: cmeq v26.4s, v26.4s, #0 |
| ; CHECK-LE-NEXT: bif v21.16b, v25.16b, v8.16b |
| ; CHECK-LE-NEXT: cmeq v25.4s, v9.4s, #0 |
| ; CHECK-LE-NEXT: cmeq v8.4s, v10.4s, #0 |
| ; CHECK-LE-NEXT: and v9.16b, v28.16b, v23.16b |
| ; CHECK-LE-NEXT: ldp q14, q10, [x3, #128] |
| ; CHECK-LE-NEXT: and v15.16b, v28.16b, v13.16b |
| ; CHECK-LE-NEXT: ldp q12, q11, [x3, #192] |
| ; CHECK-LE-NEXT: bit v24.16b, v31.16b, v25.16b |
| ; CHECK-LE-NEXT: ldr q25, [x8, :lo12:.LCPI3_5] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI3_2 |
| ; CHECK-LE-NEXT: bit v16.16b, v14.16b, v8.16b |
| ; CHECK-LE-NEXT: cmeq v31.4s, v9.4s, #0 |
| ; CHECK-LE-NEXT: str q0, [sp] // 16-byte Spill |
| ; CHECK-LE-NEXT: ldp q9, q8, [x3, #160] |
| ; CHECK-LE-NEXT: cmeq v14.4s, v15.4s, #0 |
| ; CHECK-LE-NEXT: ldp q3, q4, [x2, #224] |
| ; CHECK-LE-NEXT: and v15.16b, v28.16b, v25.16b |
| ; CHECK-LE-NEXT: ldp q30, q29, [x3, #224] |
| ; CHECK-LE-NEXT: bit v7.16b, v8.16b, v31.16b |
| ; CHECK-LE-NEXT: ldr q31, [x8, :lo12:.LCPI3_2] |
| ; CHECK-LE-NEXT: bit v6.16b, v12.16b, v14.16b |
| ; CHECK-LE-NEXT: cmeq v14.4s, v15.4s, #0 |
| ; CHECK-LE-NEXT: ldp q17, q19, [x2, #64] |
| ; CHECK-LE-NEXT: and v12.16b, v28.16b, v31.16b |
| ; CHECK-LE-NEXT: bit v4.16b, v29.16b, v27.16b |
| ; CHECK-LE-NEXT: bit v3.16b, v30.16b, v26.16b |
| ; CHECK-LE-NEXT: ldp q18, q20, [x2, #32] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI3_1 |
| ; CHECK-LE-NEXT: bit v2.16b, v11.16b, v14.16b |
| ; CHECK-LE-NEXT: ldp q29, q27, [x2] |
| ; CHECK-LE-NEXT: cmeq v12.4s, v12.4s, #0 |
| ; CHECK-LE-NEXT: ldp q30, q26, [x3, #64] |
| ; CHECK-LE-NEXT: ldp q14, q11, [x3, #32] |
| ; CHECK-LE-NEXT: ldr q8, [x8, :lo12:.LCPI3_1] |
| ; CHECK-LE-NEXT: ldp q0, q15, [x3] |
| ; CHECK-LE-NEXT: stp q21, q24, [x0, #96] |
| ; CHECK-LE-NEXT: bit v1.16b, v9.16b, v12.16b |
| ; CHECK-LE-NEXT: stp q6, q2, [x0, #192] |
| ; CHECK-LE-NEXT: and v28.16b, v28.16b, v8.16b |
| ; CHECK-LE-NEXT: stp q3, q4, [x0, #224] |
| ; CHECK-LE-NEXT: and v4.16b, v5.16b, v25.16b |
| ; CHECK-LE-NEXT: and v3.16b, v5.16b, v13.16b |
| ; CHECK-LE-NEXT: and v2.16b, v5.16b, v23.16b |
| ; CHECK-LE-NEXT: and v6.16b, v5.16b, v31.16b |
| ; CHECK-LE-NEXT: ldr q25, [sp] // 16-byte Reload |
| ; CHECK-LE-NEXT: cmeq v28.4s, v28.4s, #0 |
| ; CHECK-LE-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-LE-NEXT: cmeq v4.4s, v4.4s, #0 |
| ; CHECK-LE-NEXT: stp q1, q7, [x0, #160] |
| ; CHECK-LE-NEXT: and v7.16b, v5.16b, v8.16b |
| ; CHECK-LE-NEXT: and v5.16b, v5.16b, v22.16b |
| ; CHECK-LE-NEXT: cmeq v3.4s, v3.4s, #0 |
| ; CHECK-LE-NEXT: cmeq v1.4s, v2.4s, #0 |
| ; CHECK-LE-NEXT: bit v25.16b, v10.16b, v28.16b |
| ; CHECK-LE-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-LE-NEXT: mov v2.16b, v4.16b |
| ; CHECK-LE-NEXT: cmeq v4.4s, v6.4s, #0 |
| ; CHECK-LE-NEXT: cmeq v6.4s, v7.4s, #0 |
| ; CHECK-LE-NEXT: cmeq v5.4s, v5.4s, #0 |
| ; CHECK-LE-NEXT: bsl v3.16b, v30.16b, v17.16b |
| ; CHECK-LE-NEXT: bsl v1.16b, v11.16b, v20.16b |
| ; CHECK-LE-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-LE-NEXT: bsl v2.16b, v26.16b, v19.16b |
| ; CHECK-LE-NEXT: bsl v4.16b, v14.16b, v18.16b |
| ; CHECK-LE-NEXT: bsl v6.16b, v15.16b, v27.16b |
| ; CHECK-LE-NEXT: bif v0.16b, v29.16b, v5.16b |
| ; CHECK-LE-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-LE-NEXT: stp q16, q25, [x0, #128] |
| ; CHECK-LE-NEXT: stp q4, q1, [x0, #32] |
| ; CHECK-LE-NEXT: stp q0, q6, [x0] |
| ; CHECK-LE-NEXT: stp q3, q2, [x0, #64] |
| ; CHECK-LE-NEXT: add sp, sp, #80 |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: if_then_else64: |
| ; CHECK-BE: // %bb.0: // %start |
| ; CHECK-BE-NEXT: stp d11, d10, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-BE-NEXT: fmov d5, x1 |
| ; CHECK-BE-NEXT: add x9, x2, #224 |
| ; CHECK-BE-NEXT: add x8, x2, #240 |
| ; CHECK-BE-NEXT: ld1 { v1.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x2, #192 |
| ; CHECK-BE-NEXT: adrp x10, .LCPI3_3 |
| ; CHECK-BE-NEXT: add x10, x10, :lo12:.LCPI3_3 |
| ; CHECK-BE-NEXT: ld1 { v3.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x2, #128 |
| ; CHECK-BE-NEXT: rev64 v17.4s, v5.4s |
| ; CHECK-BE-NEXT: ld1 { v18.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x2, #80 |
| ; CHECK-BE-NEXT: ld1 { v19.4s }, [x9] |
| ; CHECK-BE-NEXT: adrp x9, .LCPI3_4 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_4 |
| ; CHECK-BE-NEXT: ld1 { v2.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #208 |
| ; CHECK-BE-NEXT: ld1 { v24.4s }, [x10] |
| ; CHECK-BE-NEXT: ld1 { v28.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x3, #96 |
| ; CHECK-BE-NEXT: ld1 { v0.4s }, [x8] |
| ; CHECK-BE-NEXT: dup v7.4s, v17.s[0] |
| ; CHECK-BE-NEXT: add x8, x2, #176 |
| ; CHECK-BE-NEXT: ld1 { v23.4s }, [x9] |
| ; CHECK-BE-NEXT: add x9, x3, #48 |
| ; CHECK-BE-NEXT: ld1 { v4.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #160 |
| ; CHECK-BE-NEXT: ld1 { v29.4s }, [x9] |
| ; CHECK-BE-NEXT: adrp x9, .LCPI3_5 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_5 |
| ; CHECK-BE-NEXT: ld1 { v5.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #144 |
| ; CHECK-BE-NEXT: and v16.16b, v7.16b, v24.16b |
| ; CHECK-BE-NEXT: and v30.16b, v7.16b, v28.16b |
| ; CHECK-BE-NEXT: ld1 { v25.4s }, [x9] |
| ; CHECK-BE-NEXT: ld1 { v6.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #96 |
| ; CHECK-BE-NEXT: adrp x9, .LCPI3_6 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_6 |
| ; CHECK-BE-NEXT: ld1 { v22.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #48 |
| ; CHECK-BE-NEXT: ld1 { v27.4s }, [x9] |
| ; CHECK-BE-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-BE-NEXT: ld1 { v20.4s }, [x8] |
| ; CHECK-BE-NEXT: cmeq v16.4s, v16.4s, #0 |
| ; CHECK-BE-NEXT: and v8.16b, v7.16b, v25.16b |
| ; CHECK-BE-NEXT: cmeq v30.4s, v30.4s, #0 |
| ; CHECK-BE-NEXT: add x8, x3, #80 |
| ; CHECK-BE-NEXT: adrp x9, .LCPI3_7 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_7 |
| ; CHECK-BE-NEXT: add x10, x2, #64 |
| ; CHECK-BE-NEXT: ld1 { v26.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x3, #64 |
| ; CHECK-BE-NEXT: ld1 { v31.4s }, [x9] |
| ; CHECK-BE-NEXT: adrp x9, .LCPI3_0 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_0 |
| ; CHECK-BE-NEXT: and v10.16b, v7.16b, v27.16b |
| ; CHECK-BE-NEXT: ld1 { v21.4s }, [x10] |
| ; CHECK-BE-NEXT: ld1 { v9.4s }, [x8] |
| ; CHECK-BE-NEXT: bsl v16.16b, v29.16b, v20.16b |
| ; CHECK-BE-NEXT: ld1 { v20.4s }, [x9] |
| ; CHECK-BE-NEXT: cmeq v8.4s, v8.4s, #0 |
| ; CHECK-BE-NEXT: dup v29.4s, v17.s[1] |
| ; CHECK-BE-NEXT: mov v17.16b, v30.16b |
| ; CHECK-BE-NEXT: add x8, x2, #112 |
| ; CHECK-BE-NEXT: and v11.16b, v7.16b, v31.16b |
| ; CHECK-BE-NEXT: cmeq v30.4s, v10.4s, #0 |
| ; CHECK-BE-NEXT: add x9, x3, #112 |
| ; CHECK-BE-NEXT: bit v19.16b, v26.16b, v8.16b |
| ; CHECK-BE-NEXT: and v8.16b, v29.16b, v20.16b |
| ; CHECK-BE-NEXT: ld1 { v10.4s }, [x9] |
| ; CHECK-BE-NEXT: bsl v17.16b, v9.16b, v21.16b |
| ; CHECK-BE-NEXT: ld1 { v9.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #32 |
| ; CHECK-BE-NEXT: ld1 { v21.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #16 |
| ; CHECK-BE-NEXT: cmeq v26.4s, v11.4s, #0 |
| ; CHECK-BE-NEXT: bif v23.16b, v22.16b, v30.16b |
| ; CHECK-BE-NEXT: ld1 { v22.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x3, #128 |
| ; CHECK-BE-NEXT: and v30.16b, v29.16b, v24.16b |
| ; CHECK-BE-NEXT: ld1 { v11.4s }, [x8] |
| ; CHECK-BE-NEXT: cmeq v8.4s, v8.4s, #0 |
| ; CHECK-BE-NEXT: add x9, x3, #176 |
| ; CHECK-BE-NEXT: bsl v26.16b, v10.16b, v9.16b |
| ; CHECK-BE-NEXT: add x8, x3, #240 |
| ; CHECK-BE-NEXT: ld1 { v9.4s }, [x9] |
| ; CHECK-BE-NEXT: and v28.16b, v29.16b, v28.16b |
| ; CHECK-BE-NEXT: and v31.16b, v29.16b, v31.16b |
| ; CHECK-BE-NEXT: cmeq v30.4s, v30.4s, #0 |
| ; CHECK-BE-NEXT: bit v18.16b, v11.16b, v8.16b |
| ; CHECK-BE-NEXT: ld1 { v8.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x3, #224 |
| ; CHECK-BE-NEXT: adrp x9, .LCPI3_2 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_2 |
| ; CHECK-BE-NEXT: ld1 { v10.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x3, #192 |
| ; CHECK-BE-NEXT: cmeq v28.4s, v28.4s, #0 |
| ; CHECK-BE-NEXT: bit v4.16b, v9.16b, v30.16b |
| ; CHECK-BE-NEXT: ld1 { v30.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x3, #208 |
| ; CHECK-BE-NEXT: ld1 { v9.4s }, [x8] |
| ; CHECK-BE-NEXT: and v27.16b, v29.16b, v27.16b |
| ; CHECK-BE-NEXT: cmeq v31.4s, v31.4s, #0 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI3_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI3_1 |
| ; CHECK-BE-NEXT: and v25.16b, v29.16b, v25.16b |
| ; CHECK-BE-NEXT: bit v3.16b, v30.16b, v28.16b |
| ; CHECK-BE-NEXT: ld1 { v28.4s }, [x9] |
| ; CHECK-BE-NEXT: ld1 { v30.4s }, [x8] |
| ; CHECK-BE-NEXT: cmeq v27.4s, v27.4s, #0 |
| ; CHECK-BE-NEXT: bit v2.16b, v8.16b, v31.16b |
| ; CHECK-BE-NEXT: add x8, x3, #160 |
| ; CHECK-BE-NEXT: cmeq v25.4s, v25.4s, #0 |
| ; CHECK-BE-NEXT: ld1 { v31.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x3, #144 |
| ; CHECK-BE-NEXT: and v8.16b, v29.16b, v28.16b |
| ; CHECK-BE-NEXT: and v29.16b, v29.16b, v30.16b |
| ; CHECK-BE-NEXT: add x9, x3, #32 |
| ; CHECK-BE-NEXT: bit v1.16b, v10.16b, v27.16b |
| ; CHECK-BE-NEXT: ld1 { v27.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x3, #16 |
| ; CHECK-BE-NEXT: bit v0.16b, v9.16b, v25.16b |
| ; CHECK-BE-NEXT: ld1 { v25.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #240 |
| ; CHECK-BE-NEXT: cmeq v8.4s, v8.4s, #0 |
| ; CHECK-BE-NEXT: cmeq v29.4s, v29.4s, #0 |
| ; CHECK-BE-NEXT: ld1 { v24.4s }, [x2] |
| ; CHECK-BE-NEXT: ld1 { v10.4s }, [x9] |
| ; CHECK-BE-NEXT: ld1 { v9.4s }, [x3] |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #224 |
| ; CHECK-BE-NEXT: mov v2.16b, v8.16b |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: mov v1.16b, v29.16b |
| ; CHECK-BE-NEXT: add x8, x0, #208 |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #192 |
| ; CHECK-BE-NEXT: and v0.16b, v7.16b, v28.16b |
| ; CHECK-BE-NEXT: bsl v2.16b, v31.16b, v5.16b |
| ; CHECK-BE-NEXT: bsl v1.16b, v27.16b, v6.16b |
| ; CHECK-BE-NEXT: st1 { v3.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #176 |
| ; CHECK-BE-NEXT: and v3.16b, v7.16b, v30.16b |
| ; CHECK-BE-NEXT: st1 { v4.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #160 |
| ; CHECK-BE-NEXT: and v4.16b, v7.16b, v20.16b |
| ; CHECK-BE-NEXT: cmeq v0.4s, v0.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #144 |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #128 |
| ; CHECK-BE-NEXT: cmeq v1.4s, v3.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v18.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #112 |
| ; CHECK-BE-NEXT: cmeq v2.4s, v4.4s, #0 |
| ; CHECK-BE-NEXT: st1 { v26.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #96 |
| ; CHECK-BE-NEXT: bsl v0.16b, v10.16b, v21.16b |
| ; CHECK-BE-NEXT: st1 { v23.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #80 |
| ; CHECK-BE-NEXT: bsl v1.16b, v25.16b, v22.16b |
| ; CHECK-BE-NEXT: st1 { v19.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #64 |
| ; CHECK-BE-NEXT: bsl v2.16b, v9.16b, v24.16b |
| ; CHECK-BE-NEXT: st1 { v17.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #48 |
| ; CHECK-BE-NEXT: st1 { v16.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #32 |
| ; CHECK-BE-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-BE-NEXT: st1 { v0.4s }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #16 |
| ; CHECK-BE-NEXT: st1 { v2.4s }, [x0] |
| ; CHECK-BE-NEXT: st1 { v1.4s }, [x8] |
| ; CHECK-BE-NEXT: ldp d11, d10, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-BE-NEXT: ret |
| start: |
| %if_true.val = load <64 x i32>, ptr %if_true, align 4 |
| %if_false.val = load <64 x i32>, ptr %if_false, align 4 |
| %0 = bitcast i64 %mask to <64 x i1> |
| %1 = select <64 x i1> %0, <64 x i32> %if_true.val, <64 x i32> %if_false.val |
| store <64 x i32> %1, ptr %out, align 4 |
| ret void |
| } |
| |
| define <8 x i8> @broadcast_u8_to_v8i8_zext(i8 %x) { |
| ; CHECK-LE-LABEL: broadcast_u8_to_v8i8_zext: |
| ; CHECK-LE: // %bb.0: |
| ; CHECK-LE-NEXT: dup v0.8b, w0 |
| ; CHECK-LE-NEXT: adrp x8, .LCPI4_0 |
| ; CHECK-LE-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] |
| ; CHECK-LE-NEXT: cmtst v0.8b, v0.8b, v1.8b |
| ; CHECK-LE-NEXT: ushr v0.8b, v0.8b, #7 |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: broadcast_u8_to_v8i8_zext: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: dup v0.8b, w0 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI4_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI4_0 |
| ; CHECK-BE-NEXT: ld1 { v1.8b }, [x8] |
| ; CHECK-BE-NEXT: cmtst v0.8b, v0.8b, v1.8b |
| ; CHECK-BE-NEXT: ushr v0.8b, v0.8b, #7 |
| ; CHECK-BE-NEXT: rev64 v0.8b, v0.8b |
| ; CHECK-BE-NEXT: ret |
| %v1 = bitcast i8 %x to <8 x i1> |
| %v8 = zext <8 x i1> %v1 to <8 x i8> |
| ret <8 x i8> %v8 |
| } |
| |
| define <8 x i8> @broadcast_u8_to_v8i8_sext(i8 %x) { |
| ; CHECK-LE-LABEL: broadcast_u8_to_v8i8_sext: |
| ; CHECK-LE: // %bb.0: |
| ; CHECK-LE-NEXT: dup v0.8b, w0 |
| ; CHECK-LE-NEXT: adrp x8, .LCPI5_0 |
| ; CHECK-LE-NEXT: ldr d1, [x8, :lo12:.LCPI5_0] |
| ; CHECK-LE-NEXT: cmtst v0.8b, v0.8b, v1.8b |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: broadcast_u8_to_v8i8_sext: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: dup v0.8b, w0 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI5_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI5_0 |
| ; CHECK-BE-NEXT: ld1 { v1.8b }, [x8] |
| ; CHECK-BE-NEXT: cmtst v0.8b, v0.8b, v1.8b |
| ; CHECK-BE-NEXT: rev64 v0.8b, v0.8b |
| ; CHECK-BE-NEXT: ret |
| %v1 = bitcast i8 %x to <8 x i1> |
| %v8 = sext <8 x i1> %v1 to <8 x i8> |
| ret <8 x i8> %v8 |
| } |
| |
| define <16 x i8> @broadcast_u16_to_v16i8_zext(i16 %x) { |
| ; CHECK-LE-LABEL: broadcast_u16_to_v16i8_zext: |
| ; CHECK-LE: // %bb.0: |
| ; CHECK-LE-NEXT: adrp x8, .LCPI6_0 |
| ; CHECK-LE-NEXT: fmov s1, w0 |
| ; CHECK-LE-NEXT: ldr q0, [x8, :lo12:.LCPI6_0] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI6_1 |
| ; CHECK-LE-NEXT: tbl v0.16b, { v1.16b }, v0.16b |
| ; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI6_1] |
| ; CHECK-LE-NEXT: cmtst v0.16b, v0.16b, v1.16b |
| ; CHECK-LE-NEXT: ushr v0.16b, v0.16b, #7 |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: broadcast_u16_to_v16i8_zext: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: fmov s0, w0 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI6_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI6_0 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI6_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI6_1 |
| ; CHECK-BE-NEXT: rev16 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v1.16b |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v1.16b |
| ; CHECK-BE-NEXT: ushr v0.16b, v0.16b, #7 |
| ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: ret |
| %v1 = bitcast i16 %x to <16 x i1> |
| %v8 = zext <16 x i1> %v1 to <16 x i8> |
| ret <16 x i8> %v8 |
| } |
| |
| define <16 x i8> @broadcast_u16_to_v16i8_sext(i16 %x) { |
| ; CHECK-LE-LABEL: broadcast_u16_to_v16i8_sext: |
| ; CHECK-LE: // %bb.0: |
| ; CHECK-LE-NEXT: adrp x8, .LCPI7_0 |
| ; CHECK-LE-NEXT: fmov s1, w0 |
| ; CHECK-LE-NEXT: ldr q0, [x8, :lo12:.LCPI7_0] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI7_1 |
| ; CHECK-LE-NEXT: tbl v0.16b, { v1.16b }, v0.16b |
| ; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI7_1] |
| ; CHECK-LE-NEXT: cmtst v0.16b, v0.16b, v1.16b |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: broadcast_u16_to_v16i8_sext: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: fmov s0, w0 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI7_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI7_0 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI7_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI7_1 |
| ; CHECK-BE-NEXT: rev16 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v1.16b |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v1.16b |
| ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: ret |
| %v1 = bitcast i16 %x to <16 x i1> |
| %v8 = sext <16 x i1> %v1 to <16 x i8> |
| ret <16 x i8> %v8 |
| } |
| |
| define <32 x i8> @broadcast_u32_to_v32i8_zext(i32 %x) { |
| ; CHECK-LE-LABEL: broadcast_u32_to_v32i8_zext: |
| ; CHECK-LE: // %bb.0: |
| ; CHECK-LE-NEXT: adrp x8, .LCPI8_0 |
| ; CHECK-LE-NEXT: adrp x9, .LCPI8_2 |
| ; CHECK-LE-NEXT: fmov s2, w0 |
| ; CHECK-LE-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] |
| ; CHECK-LE-NEXT: ldr q1, [x9, :lo12:.LCPI8_2] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI8_1 |
| ; CHECK-LE-NEXT: tbl v0.16b, { v2.16b }, v0.16b |
| ; CHECK-LE-NEXT: tbl v1.16b, { v2.16b }, v1.16b |
| ; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI8_1] |
| ; CHECK-LE-NEXT: cmtst v0.16b, v0.16b, v2.16b |
| ; CHECK-LE-NEXT: cmtst v1.16b, v1.16b, v2.16b |
| ; CHECK-LE-NEXT: ushr v0.16b, v0.16b, #7 |
| ; CHECK-LE-NEXT: ushr v1.16b, v1.16b, #7 |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: broadcast_u32_to_v32i8_zext: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: fmov s0, w0 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI8_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI8_0 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI8_2 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI8_2 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI8_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI8_1 |
| ; CHECK-BE-NEXT: rev32 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: tbl v1.16b, { v0.16b }, v1.16b |
| ; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v2.16b |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: cmtst v1.16b, v1.16b, v2.16b |
| ; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v2.16b |
| ; CHECK-BE-NEXT: ushr v1.16b, v1.16b, #7 |
| ; CHECK-BE-NEXT: ushr v0.16b, v0.16b, #7 |
| ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: rev64 v1.16b, v1.16b |
| ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ret |
| %v1 = bitcast i32 %x to <32 x i1> |
| %v8 = zext <32 x i1> %v1 to <32 x i8> |
| ret <32 x i8> %v8 |
| } |
| |
| define <32 x i8> @broadcast_u32_to_v32i8_sext(i32 %x) { |
| ; CHECK-LE-LABEL: broadcast_u32_to_v32i8_sext: |
| ; CHECK-LE: // %bb.0: |
| ; CHECK-LE-NEXT: adrp x8, .LCPI9_0 |
| ; CHECK-LE-NEXT: adrp x9, .LCPI9_2 |
| ; CHECK-LE-NEXT: fmov s2, w0 |
| ; CHECK-LE-NEXT: ldr q0, [x8, :lo12:.LCPI9_0] |
| ; CHECK-LE-NEXT: ldr q1, [x9, :lo12:.LCPI9_2] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI9_1 |
| ; CHECK-LE-NEXT: tbl v0.16b, { v2.16b }, v0.16b |
| ; CHECK-LE-NEXT: tbl v1.16b, { v2.16b }, v1.16b |
| ; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI9_1] |
| ; CHECK-LE-NEXT: cmtst v0.16b, v0.16b, v2.16b |
| ; CHECK-LE-NEXT: cmtst v1.16b, v1.16b, v2.16b |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: broadcast_u32_to_v32i8_sext: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: fmov s0, w0 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI9_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI9_0 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI9_2 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI9_2 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI9_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI9_1 |
| ; CHECK-BE-NEXT: rev32 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: tbl v1.16b, { v0.16b }, v1.16b |
| ; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v2.16b |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: cmtst v1.16b, v1.16b, v2.16b |
| ; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v2.16b |
| ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: rev64 v1.16b, v1.16b |
| ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 |
| ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 |
| ; CHECK-BE-NEXT: ret |
| %v1 = bitcast i32 %x to <32 x i1> |
| %v8 = sext <32 x i1> %v1 to <32 x i8> |
| ret <32 x i8> %v8 |
| } |
| |
| define <64 x i8> @broadcast_u64_to_v64i8_zext(i64 %x) { |
| ; CHECK-LE-LABEL: broadcast_u64_to_v64i8_zext: |
| ; CHECK-LE: // %bb.0: |
| ; CHECK-LE-NEXT: adrp x8, .LCPI10_0 |
| ; CHECK-LE-NEXT: adrp x9, .LCPI10_2 |
| ; CHECK-LE-NEXT: fmov d0, x0 |
| ; CHECK-LE-NEXT: adrp x10, .LCPI10_3 |
| ; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI10_0] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI10_4 |
| ; CHECK-LE-NEXT: ldr q2, [x9, :lo12:.LCPI10_2] |
| ; CHECK-LE-NEXT: ldr q3, [x10, :lo12:.LCPI10_3] |
| ; CHECK-LE-NEXT: ldr q4, [x8, :lo12:.LCPI10_4] |
| ; CHECK-LE-NEXT: tbl v1.16b, { v0.16b }, v1.16b |
| ; CHECK-LE-NEXT: adrp x8, .LCPI10_1 |
| ; CHECK-LE-NEXT: tbl v2.16b, { v0.16b }, v2.16b |
| ; CHECK-LE-NEXT: tbl v3.16b, { v0.16b }, v3.16b |
| ; CHECK-LE-NEXT: tbl v0.16b, { v0.16b }, v4.16b |
| ; CHECK-LE-NEXT: ldr q4, [x8, :lo12:.LCPI10_1] |
| ; CHECK-LE-NEXT: cmtst v1.16b, v1.16b, v4.16b |
| ; CHECK-LE-NEXT: cmtst v2.16b, v2.16b, v4.16b |
| ; CHECK-LE-NEXT: cmtst v3.16b, v3.16b, v4.16b |
| ; CHECK-LE-NEXT: cmtst v4.16b, v0.16b, v4.16b |
| ; CHECK-LE-NEXT: ushr v0.16b, v1.16b, #7 |
| ; CHECK-LE-NEXT: ushr v1.16b, v2.16b, #7 |
| ; CHECK-LE-NEXT: ushr v2.16b, v3.16b, #7 |
| ; CHECK-LE-NEXT: ushr v3.16b, v4.16b, #7 |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: broadcast_u64_to_v64i8_zext: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: fmov d0, x0 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI10_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI10_0 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI10_4 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI10_4 |
| ; CHECK-BE-NEXT: adrp x9, .LCPI10_2 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI10_2 |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8] |
| ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: adrp x8, .LCPI10_3 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI10_3 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x9] |
| ; CHECK-BE-NEXT: ld1 { v4.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI10_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI10_1 |
| ; CHECK-BE-NEXT: ld1 { v5.16b }, [x8] |
| ; CHECK-BE-NEXT: tbl v3.16b, { v0.16b }, v3.16b |
| ; CHECK-BE-NEXT: tbl v1.16b, { v0.16b }, v1.16b |
| ; CHECK-BE-NEXT: tbl v2.16b, { v0.16b }, v2.16b |
| ; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v4.16b |
| ; CHECK-BE-NEXT: cmtst v3.16b, v3.16b, v5.16b |
| ; CHECK-BE-NEXT: cmtst v1.16b, v1.16b, v5.16b |
| ; CHECK-BE-NEXT: cmtst v2.16b, v2.16b, v5.16b |
| ; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v5.16b |
| ; CHECK-BE-NEXT: ushr v3.16b, v3.16b, #7 |
| ; CHECK-BE-NEXT: ushr v1.16b, v1.16b, #7 |
| ; CHECK-BE-NEXT: ushr v2.16b, v2.16b, #7 |
| ; CHECK-BE-NEXT: ushr v0.16b, v0.16b, #7 |
| ; CHECK-BE-NEXT: rev64 v3.16b, v3.16b |
| ; CHECK-BE-NEXT: rev64 v2.16b, v2.16b |
| ; CHECK-BE-NEXT: rev64 v5.16b, v1.16b |
| ; CHECK-BE-NEXT: rev64 v4.16b, v0.16b |
| ; CHECK-BE-NEXT: ext v0.16b, v3.16b, v3.16b, #8 |
| ; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v5.16b, v5.16b, #8 |
| ; CHECK-BE-NEXT: ext v1.16b, v4.16b, v4.16b, #8 |
| ; CHECK-BE-NEXT: ret |
| %v1 = bitcast i64 %x to <64 x i1> |
| %v8 = zext <64 x i1> %v1 to <64 x i8> |
| ret <64 x i8> %v8 |
| } |
| |
| define <64 x i8> @broadcast_u64_to_v64i8_sext(i64 %x) { |
| ; CHECK-LE-LABEL: broadcast_u64_to_v64i8_sext: |
| ; CHECK-LE: // %bb.0: |
| ; CHECK-LE-NEXT: adrp x8, .LCPI11_0 |
| ; CHECK-LE-NEXT: adrp x9, .LCPI11_2 |
| ; CHECK-LE-NEXT: fmov d0, x0 |
| ; CHECK-LE-NEXT: adrp x10, .LCPI11_3 |
| ; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI11_4 |
| ; CHECK-LE-NEXT: ldr q2, [x9, :lo12:.LCPI11_2] |
| ; CHECK-LE-NEXT: ldr q3, [x10, :lo12:.LCPI11_3] |
| ; CHECK-LE-NEXT: ldr q4, [x8, :lo12:.LCPI11_4] |
| ; CHECK-LE-NEXT: tbl v1.16b, { v0.16b }, v1.16b |
| ; CHECK-LE-NEXT: adrp x8, .LCPI11_1 |
| ; CHECK-LE-NEXT: tbl v2.16b, { v0.16b }, v2.16b |
| ; CHECK-LE-NEXT: tbl v3.16b, { v0.16b }, v3.16b |
| ; CHECK-LE-NEXT: tbl v4.16b, { v0.16b }, v4.16b |
| ; CHECK-LE-NEXT: ldr q5, [x8, :lo12:.LCPI11_1] |
| ; CHECK-LE-NEXT: cmtst v0.16b, v1.16b, v5.16b |
| ; CHECK-LE-NEXT: cmtst v1.16b, v2.16b, v5.16b |
| ; CHECK-LE-NEXT: cmtst v2.16b, v3.16b, v5.16b |
| ; CHECK-LE-NEXT: cmtst v3.16b, v4.16b, v5.16b |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: broadcast_u64_to_v64i8_sext: |
| ; CHECK-BE: // %bb.0: |
| ; CHECK-BE-NEXT: fmov d0, x0 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI11_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI11_0 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI11_4 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI11_4 |
| ; CHECK-BE-NEXT: adrp x9, .LCPI11_2 |
| ; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI11_2 |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8] |
| ; CHECK-BE-NEXT: rev64 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: adrp x8, .LCPI11_3 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI11_3 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x9] |
| ; CHECK-BE-NEXT: ld1 { v4.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI11_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI11_1 |
| ; CHECK-BE-NEXT: ld1 { v5.16b }, [x8] |
| ; CHECK-BE-NEXT: tbl v3.16b, { v0.16b }, v3.16b |
| ; CHECK-BE-NEXT: tbl v1.16b, { v0.16b }, v1.16b |
| ; CHECK-BE-NEXT: tbl v2.16b, { v0.16b }, v2.16b |
| ; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v4.16b |
| ; CHECK-BE-NEXT: cmtst v3.16b, v3.16b, v5.16b |
| ; CHECK-BE-NEXT: cmtst v1.16b, v1.16b, v5.16b |
| ; CHECK-BE-NEXT: cmtst v2.16b, v2.16b, v5.16b |
| ; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v5.16b |
| ; CHECK-BE-NEXT: rev64 v3.16b, v3.16b |
| ; CHECK-BE-NEXT: rev64 v2.16b, v2.16b |
| ; CHECK-BE-NEXT: rev64 v5.16b, v1.16b |
| ; CHECK-BE-NEXT: rev64 v4.16b, v0.16b |
| ; CHECK-BE-NEXT: ext v0.16b, v3.16b, v3.16b, #8 |
| ; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 |
| ; CHECK-BE-NEXT: ext v3.16b, v5.16b, v5.16b, #8 |
| ; CHECK-BE-NEXT: ext v1.16b, v4.16b, v4.16b, #8 |
| ; CHECK-BE-NEXT: ret |
| %v1 = bitcast i64 %x to <64 x i1> |
| %v8 = sext <64 x i1> %v1 to <64 x i8> |
| ret <64 x i8> %v8 |
| } |
| |
| define void @if_then_else8_i8(ptr %out, i8 %mask, ptr %if_true, ptr %if_false) { |
| ; CHECK-LE-LABEL: if_then_else8_i8: |
| ; CHECK-LE: // %bb.0: // %start |
| ; CHECK-LE-NEXT: dup v0.8b, w1 |
| ; CHECK-LE-NEXT: adrp x8, .LCPI12_0 |
| ; CHECK-LE-NEXT: ldr d2, [x3] |
| ; CHECK-LE-NEXT: ldr d1, [x8, :lo12:.LCPI12_0] |
| ; CHECK-LE-NEXT: and v0.8b, v0.8b, v1.8b |
| ; CHECK-LE-NEXT: ldr d1, [x2] |
| ; CHECK-LE-NEXT: cmeq v0.8b, v0.8b, #0 |
| ; CHECK-LE-NEXT: bsl v0.8b, v2.8b, v1.8b |
| ; CHECK-LE-NEXT: str d0, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: if_then_else8_i8: |
| ; CHECK-BE: // %bb.0: // %start |
| ; CHECK-BE-NEXT: dup v0.8b, w1 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI12_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI12_0 |
| ; CHECK-BE-NEXT: ld1 { v1.8b }, [x8] |
| ; CHECK-BE-NEXT: ld1 { v2.8b }, [x3] |
| ; CHECK-BE-NEXT: and v0.8b, v0.8b, v1.8b |
| ; CHECK-BE-NEXT: ld1 { v1.8b }, [x2] |
| ; CHECK-BE-NEXT: cmeq v0.8b, v0.8b, #0 |
| ; CHECK-BE-NEXT: bsl v0.8b, v2.8b, v1.8b |
| ; CHECK-BE-NEXT: st1 { v0.8b }, [x0] |
| ; CHECK-BE-NEXT: ret |
| start: |
| %t = load <8 x i8>, ptr %if_true, align 4 |
| %f = load <8 x i8>, ptr %if_false, align 4 |
| %m = bitcast i8 %mask to <8 x i1> |
| %s = select <8 x i1> %m, <8 x i8> %t, <8 x i8> %f |
| store <8 x i8> %s, ptr %out, align 4 |
| ret void |
| } |
| |
| define void @if_then_else16_i16(ptr %out, i16 %mask, ptr %if_true, ptr %if_false) { |
| ; CHECK-LE-LABEL: if_then_else16_i16: |
| ; CHECK-LE: // %bb.0: // %start |
| ; CHECK-LE-NEXT: adrp x8, .LCPI13_1 |
| ; CHECK-LE-NEXT: dup v0.8h, w1 |
| ; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI13_1] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI13_0 |
| ; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] |
| ; CHECK-LE-NEXT: ldp q4, q3, [x2] |
| ; CHECK-LE-NEXT: and v1.16b, v0.16b, v1.16b |
| ; CHECK-LE-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-LE-NEXT: ldp q5, q2, [x3] |
| ; CHECK-LE-NEXT: cmeq v1.8h, v1.8h, #0 |
| ; CHECK-LE-NEXT: cmeq v0.8h, v0.8h, #0 |
| ; CHECK-LE-NEXT: bsl v1.16b, v2.16b, v3.16b |
| ; CHECK-LE-NEXT: bsl v0.16b, v5.16b, v4.16b |
| ; CHECK-LE-NEXT: stp q0, q1, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: if_then_else16_i16: |
| ; CHECK-BE: // %bb.0: // %start |
| ; CHECK-BE-NEXT: adrp x8, .LCPI13_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI13_1 |
| ; CHECK-BE-NEXT: dup v0.8h, w1 |
| ; CHECK-BE-NEXT: ld1 { v1.8h }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI13_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI13_0 |
| ; CHECK-BE-NEXT: ld1 { v2.8h }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #16 |
| ; CHECK-BE-NEXT: add x9, x3, #16 |
| ; CHECK-BE-NEXT: ld1 { v3.8h }, [x9] |
| ; CHECK-BE-NEXT: ld1 { v4.8h }, [x2] |
| ; CHECK-BE-NEXT: ld1 { v5.8h }, [x3] |
| ; CHECK-BE-NEXT: and v1.16b, v0.16b, v1.16b |
| ; CHECK-BE-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-BE-NEXT: ld1 { v2.8h }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #16 |
| ; CHECK-BE-NEXT: cmeq v1.8h, v1.8h, #0 |
| ; CHECK-BE-NEXT: cmeq v0.8h, v0.8h, #0 |
| ; CHECK-BE-NEXT: bsl v1.16b, v3.16b, v2.16b |
| ; CHECK-BE-NEXT: bsl v0.16b, v5.16b, v4.16b |
| ; CHECK-BE-NEXT: st1 { v1.8h }, [x8] |
| ; CHECK-BE-NEXT: st1 { v0.8h }, [x0] |
| ; CHECK-BE-NEXT: ret |
| start: |
| %t = load <16 x i16>, ptr %if_true, align 4 |
| %f = load <16 x i16>, ptr %if_false, align 4 |
| %m = bitcast i16 %mask to <16 x i1> |
| %s = select <16 x i1> %m, <16 x i16> %t, <16 x i16> %f |
| store <16 x i16> %s, ptr %out, align 4 |
| ret void |
| } |
| |
| define void @if_then_else32_i8(ptr %out, i32 %mask, ptr %if_true, ptr %if_false) { |
| ; CHECK-LE-LABEL: if_then_else32_i8: |
| ; CHECK-LE: // %bb.0: // %start |
| ; CHECK-LE-NEXT: adrp x8, .LCPI14_2 |
| ; CHECK-LE-NEXT: fmov s1, w1 |
| ; CHECK-LE-NEXT: ldr q0, [x8, :lo12:.LCPI14_2] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI14_0 |
| ; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] |
| ; CHECK-LE-NEXT: adrp x8, .LCPI14_1 |
| ; CHECK-LE-NEXT: tbl v0.16b, { v1.16b }, v0.16b |
| ; CHECK-LE-NEXT: ldp q4, q3, [x2] |
| ; CHECK-LE-NEXT: tbl v1.16b, { v1.16b }, v2.16b |
| ; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI14_1] |
| ; CHECK-LE-NEXT: and v0.16b, v0.16b, v2.16b |
| ; CHECK-LE-NEXT: and v1.16b, v1.16b, v2.16b |
| ; CHECK-LE-NEXT: ldp q5, q2, [x3] |
| ; CHECK-LE-NEXT: cmeq v0.16b, v0.16b, #0 |
| ; CHECK-LE-NEXT: cmeq v1.16b, v1.16b, #0 |
| ; CHECK-LE-NEXT: bsl v0.16b, v2.16b, v3.16b |
| ; CHECK-LE-NEXT: bsl v1.16b, v5.16b, v4.16b |
| ; CHECK-LE-NEXT: stp q1, q0, [x0] |
| ; CHECK-LE-NEXT: ret |
| ; |
| ; CHECK-BE-LABEL: if_then_else32_i8: |
| ; CHECK-BE: // %bb.0: // %start |
| ; CHECK-BE-NEXT: fmov s0, w1 |
| ; CHECK-BE-NEXT: adrp x8, .LCPI14_2 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI14_2 |
| ; CHECK-BE-NEXT: ld1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI14_0 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI14_0 |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: adrp x8, .LCPI14_1 |
| ; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI14_1 |
| ; CHECK-BE-NEXT: rev32 v0.16b, v0.16b |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x8] |
| ; CHECK-BE-NEXT: add x8, x2, #16 |
| ; CHECK-BE-NEXT: add x9, x3, #16 |
| ; CHECK-BE-NEXT: ld1 { v4.16b }, [x2] |
| ; CHECK-BE-NEXT: ld1 { v5.16b }, [x3] |
| ; CHECK-BE-NEXT: tbl v1.16b, { v0.16b }, v1.16b |
| ; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v2.16b |
| ; CHECK-BE-NEXT: ld1 { v2.16b }, [x8] |
| ; CHECK-BE-NEXT: add x8, x0, #16 |
| ; CHECK-BE-NEXT: and v1.16b, v1.16b, v3.16b |
| ; CHECK-BE-NEXT: and v0.16b, v0.16b, v3.16b |
| ; CHECK-BE-NEXT: ld1 { v3.16b }, [x9] |
| ; CHECK-BE-NEXT: cmeq v1.16b, v1.16b, #0 |
| ; CHECK-BE-NEXT: cmeq v0.16b, v0.16b, #0 |
| ; CHECK-BE-NEXT: bsl v1.16b, v3.16b, v2.16b |
| ; CHECK-BE-NEXT: bsl v0.16b, v5.16b, v4.16b |
| ; CHECK-BE-NEXT: st1 { v1.16b }, [x8] |
| ; CHECK-BE-NEXT: st1 { v0.16b }, [x0] |
| ; CHECK-BE-NEXT: ret |
| start: |
| %t = load <32 x i8>, ptr %if_true, align 4 |
| %f = load <32 x i8>, ptr %if_false, align 4 |
| %m = bitcast i32 %mask to <32 x i1> |
| %s = select <32 x i1> %m, <32 x i8> %t, <32 x i8> %f |
| store <32 x i8> %s, ptr %out, align 4 |
| ret void |
| } |
| |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; CHECK: {{.*}} |