blob: 8a79182182ab4f1c36027bad882369c480adf290 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-none-elf %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
; RUN: llc -mtriple=aarch64_be-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
define void @if_then_else8(ptr %out, i8 %mask, ptr %if_true, ptr %if_false) {
; CHECK-LE-LABEL: if_then_else8:
; CHECK-LE: // %bb.0: // %start
; CHECK-LE-NEXT: adrp x8, .LCPI0_1
; CHECK-LE-NEXT: dup v0.4s, w1
; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI0_1]
; CHECK-LE-NEXT: adrp x8, .LCPI0_0
; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI0_0]
; CHECK-LE-NEXT: ldp q4, q3, [x2]
; CHECK-LE-NEXT: and v1.16b, v0.16b, v1.16b
; CHECK-LE-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-LE-NEXT: ldp q5, q2, [x3]
; CHECK-LE-NEXT: cmeq v1.4s, v1.4s, #0
; CHECK-LE-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-LE-NEXT: bsl v1.16b, v2.16b, v3.16b
; CHECK-LE-NEXT: bsl v0.16b, v5.16b, v4.16b
; CHECK-LE-NEXT: stp q0, q1, [x0]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: if_then_else8:
; CHECK-BE: // %bb.0: // %start
; CHECK-BE-NEXT: adrp x8, .LCPI0_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_1
; CHECK-BE-NEXT: dup v0.4s, w1
; CHECK-BE-NEXT: ld1 { v1.4s }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI0_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_0
; CHECK-BE-NEXT: ld1 { v2.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #16
; CHECK-BE-NEXT: add x9, x3, #16
; CHECK-BE-NEXT: ld1 { v3.4s }, [x9]
; CHECK-BE-NEXT: ld1 { v4.4s }, [x2]
; CHECK-BE-NEXT: ld1 { v5.4s }, [x3]
; CHECK-BE-NEXT: and v1.16b, v0.16b, v1.16b
; CHECK-BE-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-BE-NEXT: ld1 { v2.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #16
; CHECK-BE-NEXT: cmeq v1.4s, v1.4s, #0
; CHECK-BE-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-BE-NEXT: bsl v1.16b, v3.16b, v2.16b
; CHECK-BE-NEXT: bsl v0.16b, v5.16b, v4.16b
; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
; CHECK-BE-NEXT: st1 { v0.4s }, [x0]
; CHECK-BE-NEXT: ret
start:
%t = load <8 x i32>, ptr %if_true, align 4
%f = load <8 x i32>, ptr %if_false, align 4
%m = bitcast i8 %mask to <8 x i1>
%s = select <8 x i1> %m, <8 x i32> %t, <8 x i32> %f
store <8 x i32> %s, ptr %out, align 4
ret void
}
define void @if_then_else16(ptr %out, i16 %mask, ptr %if_true, ptr %if_false) {
; CHECK-LE-LABEL: if_then_else16:
; CHECK-LE: // %bb.0: // %start
; CHECK-LE-NEXT: dup v0.4s, w1
; CHECK-LE-NEXT: adrp x8, .LCPI1_3
; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI1_3]
; CHECK-LE-NEXT: adrp x8, .LCPI1_2
; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
; CHECK-LE-NEXT: adrp x8, .LCPI1_1
; CHECK-LE-NEXT: ldr q3, [x8, :lo12:.LCPI1_1]
; CHECK-LE-NEXT: adrp x8, .LCPI1_0
; CHECK-LE-NEXT: and v1.16b, v0.16b, v1.16b
; CHECK-LE-NEXT: ldr q4, [x8, :lo12:.LCPI1_0]
; CHECK-LE-NEXT: and v2.16b, v0.16b, v2.16b
; CHECK-LE-NEXT: and v3.16b, v0.16b, v3.16b
; CHECK-LE-NEXT: ldp q6, q7, [x3, #32]
; CHECK-LE-NEXT: and v0.16b, v0.16b, v4.16b
; CHECK-LE-NEXT: cmeq v1.4s, v1.4s, #0
; CHECK-LE-NEXT: ldp q4, q5, [x2, #32]
; CHECK-LE-NEXT: cmeq v2.4s, v2.4s, #0
; CHECK-LE-NEXT: ldp q16, q17, [x3]
; CHECK-LE-NEXT: cmeq v3.4s, v3.4s, #0
; CHECK-LE-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-LE-NEXT: bsl v1.16b, v6.16b, v4.16b
; CHECK-LE-NEXT: ldp q4, q6, [x2]
; CHECK-LE-NEXT: bsl v2.16b, v7.16b, v5.16b
; CHECK-LE-NEXT: bsl v3.16b, v16.16b, v4.16b
; CHECK-LE-NEXT: bsl v0.16b, v17.16b, v6.16b
; CHECK-LE-NEXT: stp q1, q2, [x0, #32]
; CHECK-LE-NEXT: stp q3, q0, [x0]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: if_then_else16:
; CHECK-BE: // %bb.0: // %start
; CHECK-BE-NEXT: adrp x9, .LCPI1_3
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI1_3
; CHECK-BE-NEXT: add x8, x2, #32
; CHECK-BE-NEXT: ld1 { v2.4s }, [x9]
; CHECK-BE-NEXT: adrp x9, .LCPI1_2
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI1_2
; CHECK-BE-NEXT: ld1 { v3.4s }, [x9]
; CHECK-BE-NEXT: adrp x9, .LCPI1_1
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI1_1
; CHECK-BE-NEXT: dup v0.4s, w1
; CHECK-BE-NEXT: ld1 { v5.4s }, [x9]
; CHECK-BE-NEXT: ld1 { v1.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #48
; CHECK-BE-NEXT: add x9, x3, #48
; CHECK-BE-NEXT: ld1 { v7.4s }, [x2]
; CHECK-BE-NEXT: ld1 { v4.4s }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI1_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI1_0
; CHECK-BE-NEXT: and v2.16b, v0.16b, v2.16b
; CHECK-BE-NEXT: ld1 { v6.4s }, [x8]
; CHECK-BE-NEXT: and v3.16b, v0.16b, v3.16b
; CHECK-BE-NEXT: and v5.16b, v0.16b, v5.16b
; CHECK-BE-NEXT: add x8, x3, #32
; CHECK-BE-NEXT: ld1 { v17.4s }, [x3]
; CHECK-BE-NEXT: ld1 { v16.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #16
; CHECK-BE-NEXT: cmeq v2.4s, v2.4s, #0
; CHECK-BE-NEXT: and v0.16b, v0.16b, v6.16b
; CHECK-BE-NEXT: ld1 { v6.4s }, [x9]
; CHECK-BE-NEXT: cmeq v3.4s, v3.4s, #0
; CHECK-BE-NEXT: cmeq v5.4s, v5.4s, #0
; CHECK-BE-NEXT: add x9, x3, #16
; CHECK-BE-NEXT: bit v1.16b, v16.16b, v2.16b
; CHECK-BE-NEXT: ld1 { v2.4s }, [x8]
; CHECK-BE-NEXT: ld1 { v16.4s }, [x9]
; CHECK-BE-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-BE-NEXT: bsl v3.16b, v6.16b, v4.16b
; CHECK-BE-NEXT: mov v4.16b, v5.16b
; CHECK-BE-NEXT: add x8, x0, #32
; CHECK-BE-NEXT: bsl v4.16b, v17.16b, v7.16b
; CHECK-BE-NEXT: bsl v0.16b, v16.16b, v2.16b
; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #48
; CHECK-BE-NEXT: st1 { v3.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #16
; CHECK-BE-NEXT: st1 { v4.4s }, [x0]
; CHECK-BE-NEXT: st1 { v0.4s }, [x8]
; CHECK-BE-NEXT: ret
start:
%t = load <16 x i32>, ptr %if_true, align 4
%f = load <16 x i32>, ptr %if_false, align 4
%m = bitcast i16 %mask to <16 x i1>
%s = select <16 x i1> %m, <16 x i32> %t, <16 x i32> %f
store <16 x i32> %s, ptr %out, align 4
ret void
}
define void @if_then_else32(ptr %out, i32 %mask, ptr %if_true, ptr %if_false) {
; CHECK-LE-LABEL: if_then_else32:
; CHECK-LE: // %bb.0: // %start
; CHECK-LE-NEXT: adrp x8, .LCPI2_0
; CHECK-LE-NEXT: dup v7.4s, w1
; CHECK-LE-NEXT: ldr q5, [x8, :lo12:.LCPI2_0]
; CHECK-LE-NEXT: adrp x8, .LCPI2_1
; CHECK-LE-NEXT: ldr q16, [x8, :lo12:.LCPI2_1]
; CHECK-LE-NEXT: adrp x8, .LCPI2_6
; CHECK-LE-NEXT: ldr q19, [x8, :lo12:.LCPI2_6]
; CHECK-LE-NEXT: adrp x8, .LCPI2_7
; CHECK-LE-NEXT: and v5.16b, v7.16b, v5.16b
; CHECK-LE-NEXT: ldr q20, [x8, :lo12:.LCPI2_7]
; CHECK-LE-NEXT: adrp x8, .LCPI2_2
; CHECK-LE-NEXT: and v16.16b, v7.16b, v16.16b
; CHECK-LE-NEXT: ldr q21, [x8, :lo12:.LCPI2_2]
; CHECK-LE-NEXT: adrp x8, .LCPI2_5
; CHECK-LE-NEXT: and v19.16b, v7.16b, v19.16b
; CHECK-LE-NEXT: ldr q22, [x8, :lo12:.LCPI2_5]
; CHECK-LE-NEXT: adrp x8, .LCPI2_4
; CHECK-LE-NEXT: and v20.16b, v7.16b, v20.16b
; CHECK-LE-NEXT: ldr q23, [x8, :lo12:.LCPI2_4]
; CHECK-LE-NEXT: ldp q4, q6, [x2, #96]
; CHECK-LE-NEXT: ldp q17, q18, [x3, #96]
; CHECK-LE-NEXT: and v22.16b, v7.16b, v22.16b
; CHECK-LE-NEXT: cmeq v19.4s, v19.4s, #0
; CHECK-LE-NEXT: and v23.16b, v7.16b, v23.16b
; CHECK-LE-NEXT: cmeq v20.4s, v20.4s, #0
; CHECK-LE-NEXT: ldp q3, q2, [x2, #64]
; CHECK-LE-NEXT: adrp x8, .LCPI2_3
; CHECK-LE-NEXT: cmeq v22.4s, v22.4s, #0
; CHECK-LE-NEXT: ldr q24, [x8, :lo12:.LCPI2_3]
; CHECK-LE-NEXT: and v21.16b, v7.16b, v21.16b
; CHECK-LE-NEXT: bit v6.16b, v18.16b, v19.16b
; CHECK-LE-NEXT: ldp q18, q19, [x3, #64]
; CHECK-LE-NEXT: cmeq v23.4s, v23.4s, #0
; CHECK-LE-NEXT: bit v4.16b, v17.16b, v20.16b
; CHECK-LE-NEXT: and v7.16b, v7.16b, v24.16b
; CHECK-LE-NEXT: cmeq v16.4s, v16.4s, #0
; CHECK-LE-NEXT: cmeq v5.4s, v5.4s, #0
; CHECK-LE-NEXT: cmeq v21.4s, v21.4s, #0
; CHECK-LE-NEXT: bit v3.16b, v18.16b, v22.16b
; CHECK-LE-NEXT: ldp q1, q0, [x2, #32]
; CHECK-LE-NEXT: bit v2.16b, v19.16b, v23.16b
; CHECK-LE-NEXT: ldp q24, q25, [x2]
; CHECK-LE-NEXT: ldp q17, q20, [x3, #32]
; CHECK-LE-NEXT: cmeq v7.4s, v7.4s, #0
; CHECK-LE-NEXT: ldp q18, q22, [x3]
; CHECK-LE-NEXT: stp q4, q6, [x0, #96]
; CHECK-LE-NEXT: mov v4.16b, v16.16b
; CHECK-LE-NEXT: stp q3, q2, [x0, #64]
; CHECK-LE-NEXT: mov v3.16b, v5.16b
; CHECK-LE-NEXT: bit v1.16b, v17.16b, v7.16b
; CHECK-LE-NEXT: bit v0.16b, v20.16b, v21.16b
; CHECK-LE-NEXT: bsl v4.16b, v18.16b, v24.16b
; CHECK-LE-NEXT: bsl v3.16b, v22.16b, v25.16b
; CHECK-LE-NEXT: stp q1, q0, [x0, #32]
; CHECK-LE-NEXT: stp q4, q3, [x0]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: if_then_else32:
; CHECK-BE: // %bb.0: // %start
; CHECK-BE-NEXT: add x8, x2, #96
; CHECK-BE-NEXT: dup v19.4s, w1
; CHECK-BE-NEXT: add x9, x2, #112
; CHECK-BE-NEXT: ld1 { v5.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #80
; CHECK-BE-NEXT: ld1 { v6.4s }, [x9]
; CHECK-BE-NEXT: ld1 { v1.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #32
; CHECK-BE-NEXT: add x9, x2, #48
; CHECK-BE-NEXT: ld1 { v4.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #16
; CHECK-BE-NEXT: adrp x12, .LCPI2_7
; CHECK-BE-NEXT: add x12, x12, :lo12:.LCPI2_7
; CHECK-BE-NEXT: ld1 { v0.4s }, [x8]
; CHECK-BE-NEXT: add x8, x3, #96
; CHECK-BE-NEXT: ld1 { v7.4s }, [x8]
; CHECK-BE-NEXT: add x8, x3, #112
; CHECK-BE-NEXT: add x10, x2, #64
; CHECK-BE-NEXT: ld1 { v17.4s }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI2_5
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI2_5
; CHECK-BE-NEXT: ld1 { v20.4s }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI2_6
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI2_6
; CHECK-BE-NEXT: ld1 { v21.4s }, [x8]
; CHECK-BE-NEXT: ld1 { v2.4s }, [x9]
; CHECK-BE-NEXT: add x9, x3, #64
; CHECK-BE-NEXT: ld1 { v22.4s }, [x12]
; CHECK-BE-NEXT: ld1 { v16.4s }, [x10]
; CHECK-BE-NEXT: ld1 { v18.4s }, [x9]
; CHECK-BE-NEXT: and v20.16b, v19.16b, v20.16b
; CHECK-BE-NEXT: add x8, x3, #80
; CHECK-BE-NEXT: adrp x12, .LCPI2_4
; CHECK-BE-NEXT: add x12, x12, :lo12:.LCPI2_4
; CHECK-BE-NEXT: and v21.16b, v19.16b, v21.16b
; CHECK-BE-NEXT: ld1 { v23.4s }, [x8]
; CHECK-BE-NEXT: ld1 { v24.4s }, [x12]
; CHECK-BE-NEXT: adrp x8, .LCPI2_3
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI2_3
; CHECK-BE-NEXT: cmeq v20.4s, v20.4s, #0
; CHECK-BE-NEXT: and v22.16b, v19.16b, v22.16b
; CHECK-BE-NEXT: adrp x11, .LCPI2_2
; CHECK-BE-NEXT: add x11, x11, :lo12:.LCPI2_2
; CHECK-BE-NEXT: cmeq v21.4s, v21.4s, #0
; CHECK-BE-NEXT: adrp x9, .LCPI2_0
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI2_0
; CHECK-BE-NEXT: adrp x10, .LCPI2_1
; CHECK-BE-NEXT: add x10, x10, :lo12:.LCPI2_1
; CHECK-BE-NEXT: bit v16.16b, v18.16b, v20.16b
; CHECK-BE-NEXT: ld1 { v18.4s }, [x8]
; CHECK-BE-NEXT: ld1 { v25.4s }, [x9]
; CHECK-BE-NEXT: bit v6.16b, v17.16b, v21.16b
; CHECK-BE-NEXT: ld1 { v17.4s }, [x11]
; CHECK-BE-NEXT: ld1 { v20.4s }, [x10]
; CHECK-BE-NEXT: and v21.16b, v19.16b, v24.16b
; CHECK-BE-NEXT: cmeq v22.4s, v22.4s, #0
; CHECK-BE-NEXT: add x8, x3, #32
; CHECK-BE-NEXT: and v18.16b, v19.16b, v18.16b
; CHECK-BE-NEXT: and v25.16b, v19.16b, v25.16b
; CHECK-BE-NEXT: ld1 { v24.4s }, [x8]
; CHECK-BE-NEXT: and v17.16b, v19.16b, v17.16b
; CHECK-BE-NEXT: and v20.16b, v19.16b, v20.16b
; CHECK-BE-NEXT: add x8, x3, #48
; CHECK-BE-NEXT: cmeq v21.4s, v21.4s, #0
; CHECK-BE-NEXT: bit v5.16b, v7.16b, v22.16b
; CHECK-BE-NEXT: ld1 { v19.4s }, [x8]
; CHECK-BE-NEXT: cmeq v18.4s, v18.4s, #0
; CHECK-BE-NEXT: add x8, x3, #16
; CHECK-BE-NEXT: ld1 { v3.4s }, [x2]
; CHECK-BE-NEXT: cmeq v17.4s, v17.4s, #0
; CHECK-BE-NEXT: ld1 { v7.4s }, [x3]
; CHECK-BE-NEXT: ld1 { v22.4s }, [x8]
; CHECK-BE-NEXT: cmeq v25.4s, v25.4s, #0
; CHECK-BE-NEXT: cmeq v20.4s, v20.4s, #0
; CHECK-BE-NEXT: bit v1.16b, v23.16b, v21.16b
; CHECK-BE-NEXT: add x8, x0, #96
; CHECK-BE-NEXT: bit v4.16b, v24.16b, v18.16b
; CHECK-BE-NEXT: st1 { v5.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #112
; CHECK-BE-NEXT: bit v2.16b, v19.16b, v17.16b
; CHECK-BE-NEXT: st1 { v6.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #64
; CHECK-BE-NEXT: bit v3.16b, v7.16b, v20.16b
; CHECK-BE-NEXT: st1 { v16.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #80
; CHECK-BE-NEXT: bit v0.16b, v22.16b, v25.16b
; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #32
; CHECK-BE-NEXT: st1 { v4.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #48
; CHECK-BE-NEXT: st1 { v2.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #16
; CHECK-BE-NEXT: st1 { v3.4s }, [x0]
; CHECK-BE-NEXT: st1 { v0.4s }, [x8]
; CHECK-BE-NEXT: ret
start:
%t = load <32 x i32>, ptr %if_true, align 4
%f = load <32 x i32>, ptr %if_false, align 4
%m = bitcast i32 %mask to <32 x i1>
%s = select <32 x i1> %m, <32 x i32> %t, <32 x i32> %f
store <32 x i32> %s, ptr %out, align 4
ret void
}
define void @if_then_else64(ptr %out, i64 %mask, ptr %if_true, ptr %if_false) nounwind {
; CHECK-LE-LABEL: if_then_else64:
; CHECK-LE: // %bb.0: // %start
; CHECK-LE-NEXT: sub sp, sp, #80
; CHECK-LE-NEXT: fmov d21, x1
; CHECK-LE-NEXT: adrp x8, .LCPI3_6
; CHECK-LE-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-LE-NEXT: ldr q26, [x8, :lo12:.LCPI3_6]
; CHECK-LE-NEXT: adrp x8, .LCPI3_7
; CHECK-LE-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-LE-NEXT: ldr q27, [x8, :lo12:.LCPI3_7]
; CHECK-LE-NEXT: adrp x8, .LCPI3_0
; CHECK-LE-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-LE-NEXT: dup v5.4s, v21.s[0]
; CHECK-LE-NEXT: dup v28.4s, v21.s[1]
; CHECK-LE-NEXT: ldr q22, [x8, :lo12:.LCPI3_0]
; CHECK-LE-NEXT: ldp q25, q24, [x2, #96]
; CHECK-LE-NEXT: adrp x8, .LCPI3_3
; CHECK-LE-NEXT: ldp q21, q31, [x3, #96]
; CHECK-LE-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-LE-NEXT: and v23.16b, v5.16b, v26.16b
; CHECK-LE-NEXT: and v9.16b, v5.16b, v27.16b
; CHECK-LE-NEXT: and v10.16b, v28.16b, v22.16b
; CHECK-LE-NEXT: ldp q16, q0, [x2, #128]
; CHECK-LE-NEXT: and v27.16b, v28.16b, v27.16b
; CHECK-LE-NEXT: ldp q1, q7, [x2, #160]
; CHECK-LE-NEXT: and v26.16b, v28.16b, v26.16b
; CHECK-LE-NEXT: cmeq v8.4s, v23.4s, #0
; CHECK-LE-NEXT: ldr q23, [x8, :lo12:.LCPI3_3]
; CHECK-LE-NEXT: adrp x8, .LCPI3_4
; CHECK-LE-NEXT: ldr q13, [x8, :lo12:.LCPI3_4]
; CHECK-LE-NEXT: adrp x8, .LCPI3_5
; CHECK-LE-NEXT: cmeq v27.4s, v27.4s, #0
; CHECK-LE-NEXT: ldp q6, q2, [x2, #192]
; CHECK-LE-NEXT: cmeq v26.4s, v26.4s, #0
; CHECK-LE-NEXT: bif v21.16b, v25.16b, v8.16b
; CHECK-LE-NEXT: cmeq v25.4s, v9.4s, #0
; CHECK-LE-NEXT: cmeq v8.4s, v10.4s, #0
; CHECK-LE-NEXT: and v9.16b, v28.16b, v23.16b
; CHECK-LE-NEXT: ldp q14, q10, [x3, #128]
; CHECK-LE-NEXT: and v15.16b, v28.16b, v13.16b
; CHECK-LE-NEXT: ldp q12, q11, [x3, #192]
; CHECK-LE-NEXT: bit v24.16b, v31.16b, v25.16b
; CHECK-LE-NEXT: ldr q25, [x8, :lo12:.LCPI3_5]
; CHECK-LE-NEXT: adrp x8, .LCPI3_2
; CHECK-LE-NEXT: bit v16.16b, v14.16b, v8.16b
; CHECK-LE-NEXT: cmeq v31.4s, v9.4s, #0
; CHECK-LE-NEXT: str q0, [sp] // 16-byte Spill
; CHECK-LE-NEXT: ldp q9, q8, [x3, #160]
; CHECK-LE-NEXT: cmeq v14.4s, v15.4s, #0
; CHECK-LE-NEXT: ldp q3, q4, [x2, #224]
; CHECK-LE-NEXT: and v15.16b, v28.16b, v25.16b
; CHECK-LE-NEXT: ldp q30, q29, [x3, #224]
; CHECK-LE-NEXT: bit v7.16b, v8.16b, v31.16b
; CHECK-LE-NEXT: ldr q31, [x8, :lo12:.LCPI3_2]
; CHECK-LE-NEXT: bit v6.16b, v12.16b, v14.16b
; CHECK-LE-NEXT: cmeq v14.4s, v15.4s, #0
; CHECK-LE-NEXT: ldp q17, q19, [x2, #64]
; CHECK-LE-NEXT: and v12.16b, v28.16b, v31.16b
; CHECK-LE-NEXT: bit v4.16b, v29.16b, v27.16b
; CHECK-LE-NEXT: bit v3.16b, v30.16b, v26.16b
; CHECK-LE-NEXT: ldp q18, q20, [x2, #32]
; CHECK-LE-NEXT: adrp x8, .LCPI3_1
; CHECK-LE-NEXT: bit v2.16b, v11.16b, v14.16b
; CHECK-LE-NEXT: ldp q29, q27, [x2]
; CHECK-LE-NEXT: cmeq v12.4s, v12.4s, #0
; CHECK-LE-NEXT: ldp q30, q26, [x3, #64]
; CHECK-LE-NEXT: ldp q14, q11, [x3, #32]
; CHECK-LE-NEXT: ldr q8, [x8, :lo12:.LCPI3_1]
; CHECK-LE-NEXT: ldp q0, q15, [x3]
; CHECK-LE-NEXT: stp q21, q24, [x0, #96]
; CHECK-LE-NEXT: bit v1.16b, v9.16b, v12.16b
; CHECK-LE-NEXT: stp q6, q2, [x0, #192]
; CHECK-LE-NEXT: and v28.16b, v28.16b, v8.16b
; CHECK-LE-NEXT: stp q3, q4, [x0, #224]
; CHECK-LE-NEXT: and v4.16b, v5.16b, v25.16b
; CHECK-LE-NEXT: and v3.16b, v5.16b, v13.16b
; CHECK-LE-NEXT: and v2.16b, v5.16b, v23.16b
; CHECK-LE-NEXT: and v6.16b, v5.16b, v31.16b
; CHECK-LE-NEXT: ldr q25, [sp] // 16-byte Reload
; CHECK-LE-NEXT: cmeq v28.4s, v28.4s, #0
; CHECK-LE-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-LE-NEXT: cmeq v4.4s, v4.4s, #0
; CHECK-LE-NEXT: stp q1, q7, [x0, #160]
; CHECK-LE-NEXT: and v7.16b, v5.16b, v8.16b
; CHECK-LE-NEXT: and v5.16b, v5.16b, v22.16b
; CHECK-LE-NEXT: cmeq v3.4s, v3.4s, #0
; CHECK-LE-NEXT: cmeq v1.4s, v2.4s, #0
; CHECK-LE-NEXT: bit v25.16b, v10.16b, v28.16b
; CHECK-LE-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-LE-NEXT: mov v2.16b, v4.16b
; CHECK-LE-NEXT: cmeq v4.4s, v6.4s, #0
; CHECK-LE-NEXT: cmeq v6.4s, v7.4s, #0
; CHECK-LE-NEXT: cmeq v5.4s, v5.4s, #0
; CHECK-LE-NEXT: bsl v3.16b, v30.16b, v17.16b
; CHECK-LE-NEXT: bsl v1.16b, v11.16b, v20.16b
; CHECK-LE-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-LE-NEXT: bsl v2.16b, v26.16b, v19.16b
; CHECK-LE-NEXT: bsl v4.16b, v14.16b, v18.16b
; CHECK-LE-NEXT: bsl v6.16b, v15.16b, v27.16b
; CHECK-LE-NEXT: bif v0.16b, v29.16b, v5.16b
; CHECK-LE-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK-LE-NEXT: stp q16, q25, [x0, #128]
; CHECK-LE-NEXT: stp q4, q1, [x0, #32]
; CHECK-LE-NEXT: stp q0, q6, [x0]
; CHECK-LE-NEXT: stp q3, q2, [x0, #64]
; CHECK-LE-NEXT: add sp, sp, #80
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: if_then_else64:
; CHECK-BE: // %bb.0: // %start
; CHECK-BE-NEXT: stp d11, d10, [sp, #-32]! // 16-byte Folded Spill
; CHECK-BE-NEXT: fmov d5, x1
; CHECK-BE-NEXT: add x9, x2, #224
; CHECK-BE-NEXT: add x8, x2, #240
; CHECK-BE-NEXT: ld1 { v1.4s }, [x9]
; CHECK-BE-NEXT: add x9, x2, #192
; CHECK-BE-NEXT: adrp x10, .LCPI3_3
; CHECK-BE-NEXT: add x10, x10, :lo12:.LCPI3_3
; CHECK-BE-NEXT: ld1 { v3.4s }, [x9]
; CHECK-BE-NEXT: add x9, x2, #128
; CHECK-BE-NEXT: rev64 v17.4s, v5.4s
; CHECK-BE-NEXT: ld1 { v18.4s }, [x9]
; CHECK-BE-NEXT: add x9, x2, #80
; CHECK-BE-NEXT: ld1 { v19.4s }, [x9]
; CHECK-BE-NEXT: adrp x9, .LCPI3_4
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_4
; CHECK-BE-NEXT: ld1 { v2.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #208
; CHECK-BE-NEXT: ld1 { v24.4s }, [x10]
; CHECK-BE-NEXT: ld1 { v28.4s }, [x9]
; CHECK-BE-NEXT: add x9, x3, #96
; CHECK-BE-NEXT: ld1 { v0.4s }, [x8]
; CHECK-BE-NEXT: dup v7.4s, v17.s[0]
; CHECK-BE-NEXT: add x8, x2, #176
; CHECK-BE-NEXT: ld1 { v23.4s }, [x9]
; CHECK-BE-NEXT: add x9, x3, #48
; CHECK-BE-NEXT: ld1 { v4.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #160
; CHECK-BE-NEXT: ld1 { v29.4s }, [x9]
; CHECK-BE-NEXT: adrp x9, .LCPI3_5
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_5
; CHECK-BE-NEXT: ld1 { v5.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #144
; CHECK-BE-NEXT: and v16.16b, v7.16b, v24.16b
; CHECK-BE-NEXT: and v30.16b, v7.16b, v28.16b
; CHECK-BE-NEXT: ld1 { v25.4s }, [x9]
; CHECK-BE-NEXT: ld1 { v6.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #96
; CHECK-BE-NEXT: adrp x9, .LCPI3_6
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_6
; CHECK-BE-NEXT: ld1 { v22.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #48
; CHECK-BE-NEXT: ld1 { v27.4s }, [x9]
; CHECK-BE-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
; CHECK-BE-NEXT: ld1 { v20.4s }, [x8]
; CHECK-BE-NEXT: cmeq v16.4s, v16.4s, #0
; CHECK-BE-NEXT: and v8.16b, v7.16b, v25.16b
; CHECK-BE-NEXT: cmeq v30.4s, v30.4s, #0
; CHECK-BE-NEXT: add x8, x3, #80
; CHECK-BE-NEXT: adrp x9, .LCPI3_7
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_7
; CHECK-BE-NEXT: add x10, x2, #64
; CHECK-BE-NEXT: ld1 { v26.4s }, [x8]
; CHECK-BE-NEXT: add x8, x3, #64
; CHECK-BE-NEXT: ld1 { v31.4s }, [x9]
; CHECK-BE-NEXT: adrp x9, .LCPI3_0
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_0
; CHECK-BE-NEXT: and v10.16b, v7.16b, v27.16b
; CHECK-BE-NEXT: ld1 { v21.4s }, [x10]
; CHECK-BE-NEXT: ld1 { v9.4s }, [x8]
; CHECK-BE-NEXT: bsl v16.16b, v29.16b, v20.16b
; CHECK-BE-NEXT: ld1 { v20.4s }, [x9]
; CHECK-BE-NEXT: cmeq v8.4s, v8.4s, #0
; CHECK-BE-NEXT: dup v29.4s, v17.s[1]
; CHECK-BE-NEXT: mov v17.16b, v30.16b
; CHECK-BE-NEXT: add x8, x2, #112
; CHECK-BE-NEXT: and v11.16b, v7.16b, v31.16b
; CHECK-BE-NEXT: cmeq v30.4s, v10.4s, #0
; CHECK-BE-NEXT: add x9, x3, #112
; CHECK-BE-NEXT: bit v19.16b, v26.16b, v8.16b
; CHECK-BE-NEXT: and v8.16b, v29.16b, v20.16b
; CHECK-BE-NEXT: ld1 { v10.4s }, [x9]
; CHECK-BE-NEXT: bsl v17.16b, v9.16b, v21.16b
; CHECK-BE-NEXT: ld1 { v9.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #32
; CHECK-BE-NEXT: ld1 { v21.4s }, [x8]
; CHECK-BE-NEXT: add x8, x2, #16
; CHECK-BE-NEXT: cmeq v26.4s, v11.4s, #0
; CHECK-BE-NEXT: bif v23.16b, v22.16b, v30.16b
; CHECK-BE-NEXT: ld1 { v22.4s }, [x8]
; CHECK-BE-NEXT: add x8, x3, #128
; CHECK-BE-NEXT: and v30.16b, v29.16b, v24.16b
; CHECK-BE-NEXT: ld1 { v11.4s }, [x8]
; CHECK-BE-NEXT: cmeq v8.4s, v8.4s, #0
; CHECK-BE-NEXT: add x9, x3, #176
; CHECK-BE-NEXT: bsl v26.16b, v10.16b, v9.16b
; CHECK-BE-NEXT: add x8, x3, #240
; CHECK-BE-NEXT: ld1 { v9.4s }, [x9]
; CHECK-BE-NEXT: and v28.16b, v29.16b, v28.16b
; CHECK-BE-NEXT: and v31.16b, v29.16b, v31.16b
; CHECK-BE-NEXT: cmeq v30.4s, v30.4s, #0
; CHECK-BE-NEXT: bit v18.16b, v11.16b, v8.16b
; CHECK-BE-NEXT: ld1 { v8.4s }, [x8]
; CHECK-BE-NEXT: add x8, x3, #224
; CHECK-BE-NEXT: adrp x9, .LCPI3_2
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI3_2
; CHECK-BE-NEXT: ld1 { v10.4s }, [x8]
; CHECK-BE-NEXT: add x8, x3, #192
; CHECK-BE-NEXT: cmeq v28.4s, v28.4s, #0
; CHECK-BE-NEXT: bit v4.16b, v9.16b, v30.16b
; CHECK-BE-NEXT: ld1 { v30.4s }, [x8]
; CHECK-BE-NEXT: add x8, x3, #208
; CHECK-BE-NEXT: ld1 { v9.4s }, [x8]
; CHECK-BE-NEXT: and v27.16b, v29.16b, v27.16b
; CHECK-BE-NEXT: cmeq v31.4s, v31.4s, #0
; CHECK-BE-NEXT: adrp x8, .LCPI3_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI3_1
; CHECK-BE-NEXT: and v25.16b, v29.16b, v25.16b
; CHECK-BE-NEXT: bit v3.16b, v30.16b, v28.16b
; CHECK-BE-NEXT: ld1 { v28.4s }, [x9]
; CHECK-BE-NEXT: ld1 { v30.4s }, [x8]
; CHECK-BE-NEXT: cmeq v27.4s, v27.4s, #0
; CHECK-BE-NEXT: bit v2.16b, v8.16b, v31.16b
; CHECK-BE-NEXT: add x8, x3, #160
; CHECK-BE-NEXT: cmeq v25.4s, v25.4s, #0
; CHECK-BE-NEXT: ld1 { v31.4s }, [x8]
; CHECK-BE-NEXT: add x8, x3, #144
; CHECK-BE-NEXT: and v8.16b, v29.16b, v28.16b
; CHECK-BE-NEXT: and v29.16b, v29.16b, v30.16b
; CHECK-BE-NEXT: add x9, x3, #32
; CHECK-BE-NEXT: bit v1.16b, v10.16b, v27.16b
; CHECK-BE-NEXT: ld1 { v27.4s }, [x8]
; CHECK-BE-NEXT: add x8, x3, #16
; CHECK-BE-NEXT: bit v0.16b, v9.16b, v25.16b
; CHECK-BE-NEXT: ld1 { v25.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #240
; CHECK-BE-NEXT: cmeq v8.4s, v8.4s, #0
; CHECK-BE-NEXT: cmeq v29.4s, v29.4s, #0
; CHECK-BE-NEXT: ld1 { v24.4s }, [x2]
; CHECK-BE-NEXT: ld1 { v10.4s }, [x9]
; CHECK-BE-NEXT: ld1 { v9.4s }, [x3]
; CHECK-BE-NEXT: st1 { v2.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #224
; CHECK-BE-NEXT: mov v2.16b, v8.16b
; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
; CHECK-BE-NEXT: mov v1.16b, v29.16b
; CHECK-BE-NEXT: add x8, x0, #208
; CHECK-BE-NEXT: st1 { v0.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #192
; CHECK-BE-NEXT: and v0.16b, v7.16b, v28.16b
; CHECK-BE-NEXT: bsl v2.16b, v31.16b, v5.16b
; CHECK-BE-NEXT: bsl v1.16b, v27.16b, v6.16b
; CHECK-BE-NEXT: st1 { v3.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #176
; CHECK-BE-NEXT: and v3.16b, v7.16b, v30.16b
; CHECK-BE-NEXT: st1 { v4.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #160
; CHECK-BE-NEXT: and v4.16b, v7.16b, v20.16b
; CHECK-BE-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-BE-NEXT: st1 { v2.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #144
; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #128
; CHECK-BE-NEXT: cmeq v1.4s, v3.4s, #0
; CHECK-BE-NEXT: st1 { v18.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #112
; CHECK-BE-NEXT: cmeq v2.4s, v4.4s, #0
; CHECK-BE-NEXT: st1 { v26.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #96
; CHECK-BE-NEXT: bsl v0.16b, v10.16b, v21.16b
; CHECK-BE-NEXT: st1 { v23.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #80
; CHECK-BE-NEXT: bsl v1.16b, v25.16b, v22.16b
; CHECK-BE-NEXT: st1 { v19.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #64
; CHECK-BE-NEXT: bsl v2.16b, v9.16b, v24.16b
; CHECK-BE-NEXT: st1 { v17.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #48
; CHECK-BE-NEXT: st1 { v16.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #32
; CHECK-BE-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
; CHECK-BE-NEXT: st1 { v0.4s }, [x8]
; CHECK-BE-NEXT: add x8, x0, #16
; CHECK-BE-NEXT: st1 { v2.4s }, [x0]
; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
; CHECK-BE-NEXT: ldp d11, d10, [sp], #32 // 16-byte Folded Reload
; CHECK-BE-NEXT: ret
start:
%if_true.val = load <64 x i32>, ptr %if_true, align 4
%if_false.val = load <64 x i32>, ptr %if_false, align 4
%0 = bitcast i64 %mask to <64 x i1>
%1 = select <64 x i1> %0, <64 x i32> %if_true.val, <64 x i32> %if_false.val
store <64 x i32> %1, ptr %out, align 4
ret void
}
define <8 x i8> @broadcast_u8_to_v8i8_zext(i8 %x) {
; CHECK-LE-LABEL: broadcast_u8_to_v8i8_zext:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: dup v0.8b, w0
; CHECK-LE-NEXT: adrp x8, .LCPI4_0
; CHECK-LE-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
; CHECK-LE-NEXT: cmtst v0.8b, v0.8b, v1.8b
; CHECK-LE-NEXT: ushr v0.8b, v0.8b, #7
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: broadcast_u8_to_v8i8_zext:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: dup v0.8b, w0
; CHECK-BE-NEXT: adrp x8, .LCPI4_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI4_0
; CHECK-BE-NEXT: ld1 { v1.8b }, [x8]
; CHECK-BE-NEXT: cmtst v0.8b, v0.8b, v1.8b
; CHECK-BE-NEXT: ushr v0.8b, v0.8b, #7
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
; CHECK-BE-NEXT: ret
%v1 = bitcast i8 %x to <8 x i1>
%v8 = zext <8 x i1> %v1 to <8 x i8>
ret <8 x i8> %v8
}
define <8 x i8> @broadcast_u8_to_v8i8_sext(i8 %x) {
; CHECK-LE-LABEL: broadcast_u8_to_v8i8_sext:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: dup v0.8b, w0
; CHECK-LE-NEXT: adrp x8, .LCPI5_0
; CHECK-LE-NEXT: ldr d1, [x8, :lo12:.LCPI5_0]
; CHECK-LE-NEXT: cmtst v0.8b, v0.8b, v1.8b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: broadcast_u8_to_v8i8_sext:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: dup v0.8b, w0
; CHECK-BE-NEXT: adrp x8, .LCPI5_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI5_0
; CHECK-BE-NEXT: ld1 { v1.8b }, [x8]
; CHECK-BE-NEXT: cmtst v0.8b, v0.8b, v1.8b
; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
; CHECK-BE-NEXT: ret
%v1 = bitcast i8 %x to <8 x i1>
%v8 = sext <8 x i1> %v1 to <8 x i8>
ret <8 x i8> %v8
}
define <16 x i8> @broadcast_u16_to_v16i8_zext(i16 %x) {
; CHECK-LE-LABEL: broadcast_u16_to_v16i8_zext:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: adrp x8, .LCPI6_0
; CHECK-LE-NEXT: fmov s1, w0
; CHECK-LE-NEXT: ldr q0, [x8, :lo12:.LCPI6_0]
; CHECK-LE-NEXT: adrp x8, .LCPI6_1
; CHECK-LE-NEXT: tbl v0.16b, { v1.16b }, v0.16b
; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI6_1]
; CHECK-LE-NEXT: cmtst v0.16b, v0.16b, v1.16b
; CHECK-LE-NEXT: ushr v0.16b, v0.16b, #7
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: broadcast_u16_to_v16i8_zext:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: adrp x8, .LCPI6_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI6_0
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI6_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI6_1
; CHECK-BE-NEXT: rev16 v0.16b, v0.16b
; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v1.16b
; CHECK-BE-NEXT: ushr v0.16b, v0.16b, #7
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
%v1 = bitcast i16 %x to <16 x i1>
%v8 = zext <16 x i1> %v1 to <16 x i8>
ret <16 x i8> %v8
}
define <16 x i8> @broadcast_u16_to_v16i8_sext(i16 %x) {
; CHECK-LE-LABEL: broadcast_u16_to_v16i8_sext:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: adrp x8, .LCPI7_0
; CHECK-LE-NEXT: fmov s1, w0
; CHECK-LE-NEXT: ldr q0, [x8, :lo12:.LCPI7_0]
; CHECK-LE-NEXT: adrp x8, .LCPI7_1
; CHECK-LE-NEXT: tbl v0.16b, { v1.16b }, v0.16b
; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI7_1]
; CHECK-LE-NEXT: cmtst v0.16b, v0.16b, v1.16b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: broadcast_u16_to_v16i8_sext:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: adrp x8, .LCPI7_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI7_0
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI7_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI7_1
; CHECK-BE-NEXT: rev16 v0.16b, v0.16b
; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v1.16b
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
%v1 = bitcast i16 %x to <16 x i1>
%v8 = sext <16 x i1> %v1 to <16 x i8>
ret <16 x i8> %v8
}
define <32 x i8> @broadcast_u32_to_v32i8_zext(i32 %x) {
; CHECK-LE-LABEL: broadcast_u32_to_v32i8_zext:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: adrp x8, .LCPI8_0
; CHECK-LE-NEXT: adrp x9, .LCPI8_2
; CHECK-LE-NEXT: fmov s2, w0
; CHECK-LE-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
; CHECK-LE-NEXT: ldr q1, [x9, :lo12:.LCPI8_2]
; CHECK-LE-NEXT: adrp x8, .LCPI8_1
; CHECK-LE-NEXT: tbl v0.16b, { v2.16b }, v0.16b
; CHECK-LE-NEXT: tbl v1.16b, { v2.16b }, v1.16b
; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI8_1]
; CHECK-LE-NEXT: cmtst v0.16b, v0.16b, v2.16b
; CHECK-LE-NEXT: cmtst v1.16b, v1.16b, v2.16b
; CHECK-LE-NEXT: ushr v0.16b, v0.16b, #7
; CHECK-LE-NEXT: ushr v1.16b, v1.16b, #7
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: broadcast_u32_to_v32i8_zext:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: adrp x8, .LCPI8_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI8_0
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI8_2
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI8_2
; CHECK-BE-NEXT: ld1 { v2.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI8_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI8_1
; CHECK-BE-NEXT: rev32 v0.16b, v0.16b
; CHECK-BE-NEXT: tbl v1.16b, { v0.16b }, v1.16b
; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v2.16b
; CHECK-BE-NEXT: ld1 { v2.16b }, [x8]
; CHECK-BE-NEXT: cmtst v1.16b, v1.16b, v2.16b
; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v2.16b
; CHECK-BE-NEXT: ushr v1.16b, v1.16b, #7
; CHECK-BE-NEXT: ushr v0.16b, v0.16b, #7
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
; CHECK-BE-NEXT: rev64 v1.16b, v1.16b
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-BE-NEXT: ret
%v1 = bitcast i32 %x to <32 x i1>
%v8 = zext <32 x i1> %v1 to <32 x i8>
ret <32 x i8> %v8
}
define <32 x i8> @broadcast_u32_to_v32i8_sext(i32 %x) {
; CHECK-LE-LABEL: broadcast_u32_to_v32i8_sext:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: adrp x8, .LCPI9_0
; CHECK-LE-NEXT: adrp x9, .LCPI9_2
; CHECK-LE-NEXT: fmov s2, w0
; CHECK-LE-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
; CHECK-LE-NEXT: ldr q1, [x9, :lo12:.LCPI9_2]
; CHECK-LE-NEXT: adrp x8, .LCPI9_1
; CHECK-LE-NEXT: tbl v0.16b, { v2.16b }, v0.16b
; CHECK-LE-NEXT: tbl v1.16b, { v2.16b }, v1.16b
; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI9_1]
; CHECK-LE-NEXT: cmtst v0.16b, v0.16b, v2.16b
; CHECK-LE-NEXT: cmtst v1.16b, v1.16b, v2.16b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: broadcast_u32_to_v32i8_sext:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: adrp x8, .LCPI9_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI9_0
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI9_2
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI9_2
; CHECK-BE-NEXT: ld1 { v2.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI9_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI9_1
; CHECK-BE-NEXT: rev32 v0.16b, v0.16b
; CHECK-BE-NEXT: tbl v1.16b, { v0.16b }, v1.16b
; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v2.16b
; CHECK-BE-NEXT: ld1 { v2.16b }, [x8]
; CHECK-BE-NEXT: cmtst v1.16b, v1.16b, v2.16b
; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v2.16b
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
; CHECK-BE-NEXT: rev64 v1.16b, v1.16b
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-BE-NEXT: ret
%v1 = bitcast i32 %x to <32 x i1>
%v8 = sext <32 x i1> %v1 to <32 x i8>
ret <32 x i8> %v8
}
define <64 x i8> @broadcast_u64_to_v64i8_zext(i64 %x) {
; CHECK-LE-LABEL: broadcast_u64_to_v64i8_zext:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: adrp x8, .LCPI10_0
; CHECK-LE-NEXT: adrp x9, .LCPI10_2
; CHECK-LE-NEXT: fmov d0, x0
; CHECK-LE-NEXT: adrp x10, .LCPI10_3
; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI10_0]
; CHECK-LE-NEXT: adrp x8, .LCPI10_4
; CHECK-LE-NEXT: ldr q2, [x9, :lo12:.LCPI10_2]
; CHECK-LE-NEXT: ldr q3, [x10, :lo12:.LCPI10_3]
; CHECK-LE-NEXT: ldr q4, [x8, :lo12:.LCPI10_4]
; CHECK-LE-NEXT: tbl v1.16b, { v0.16b }, v1.16b
; CHECK-LE-NEXT: adrp x8, .LCPI10_1
; CHECK-LE-NEXT: tbl v2.16b, { v0.16b }, v2.16b
; CHECK-LE-NEXT: tbl v3.16b, { v0.16b }, v3.16b
; CHECK-LE-NEXT: tbl v0.16b, { v0.16b }, v4.16b
; CHECK-LE-NEXT: ldr q4, [x8, :lo12:.LCPI10_1]
; CHECK-LE-NEXT: cmtst v1.16b, v1.16b, v4.16b
; CHECK-LE-NEXT: cmtst v2.16b, v2.16b, v4.16b
; CHECK-LE-NEXT: cmtst v3.16b, v3.16b, v4.16b
; CHECK-LE-NEXT: cmtst v4.16b, v0.16b, v4.16b
; CHECK-LE-NEXT: ushr v0.16b, v1.16b, #7
; CHECK-LE-NEXT: ushr v1.16b, v2.16b, #7
; CHECK-LE-NEXT: ushr v2.16b, v3.16b, #7
; CHECK-LE-NEXT: ushr v3.16b, v4.16b, #7
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: broadcast_u64_to_v64i8_zext:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: fmov d0, x0
; CHECK-BE-NEXT: adrp x8, .LCPI10_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI10_0
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI10_4
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI10_4
; CHECK-BE-NEXT: adrp x9, .LCPI10_2
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI10_2
; CHECK-BE-NEXT: ld1 { v3.16b }, [x8]
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
; CHECK-BE-NEXT: adrp x8, .LCPI10_3
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI10_3
; CHECK-BE-NEXT: ld1 { v2.16b }, [x9]
; CHECK-BE-NEXT: ld1 { v4.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI10_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI10_1
; CHECK-BE-NEXT: ld1 { v5.16b }, [x8]
; CHECK-BE-NEXT: tbl v3.16b, { v0.16b }, v3.16b
; CHECK-BE-NEXT: tbl v1.16b, { v0.16b }, v1.16b
; CHECK-BE-NEXT: tbl v2.16b, { v0.16b }, v2.16b
; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v4.16b
; CHECK-BE-NEXT: cmtst v3.16b, v3.16b, v5.16b
; CHECK-BE-NEXT: cmtst v1.16b, v1.16b, v5.16b
; CHECK-BE-NEXT: cmtst v2.16b, v2.16b, v5.16b
; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v5.16b
; CHECK-BE-NEXT: ushr v3.16b, v3.16b, #7
; CHECK-BE-NEXT: ushr v1.16b, v1.16b, #7
; CHECK-BE-NEXT: ushr v2.16b, v2.16b, #7
; CHECK-BE-NEXT: ushr v0.16b, v0.16b, #7
; CHECK-BE-NEXT: rev64 v3.16b, v3.16b
; CHECK-BE-NEXT: rev64 v2.16b, v2.16b
; CHECK-BE-NEXT: rev64 v5.16b, v1.16b
; CHECK-BE-NEXT: rev64 v4.16b, v0.16b
; CHECK-BE-NEXT: ext v0.16b, v3.16b, v3.16b, #8
; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
; CHECK-BE-NEXT: ext v3.16b, v5.16b, v5.16b, #8
; CHECK-BE-NEXT: ext v1.16b, v4.16b, v4.16b, #8
; CHECK-BE-NEXT: ret
%v1 = bitcast i64 %x to <64 x i1>
%v8 = zext <64 x i1> %v1 to <64 x i8>
ret <64 x i8> %v8
}
define <64 x i8> @broadcast_u64_to_v64i8_sext(i64 %x) {
; CHECK-LE-LABEL: broadcast_u64_to_v64i8_sext:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: adrp x8, .LCPI11_0
; CHECK-LE-NEXT: adrp x9, .LCPI11_2
; CHECK-LE-NEXT: fmov d0, x0
; CHECK-LE-NEXT: adrp x10, .LCPI11_3
; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI11_0]
; CHECK-LE-NEXT: adrp x8, .LCPI11_4
; CHECK-LE-NEXT: ldr q2, [x9, :lo12:.LCPI11_2]
; CHECK-LE-NEXT: ldr q3, [x10, :lo12:.LCPI11_3]
; CHECK-LE-NEXT: ldr q4, [x8, :lo12:.LCPI11_4]
; CHECK-LE-NEXT: tbl v1.16b, { v0.16b }, v1.16b
; CHECK-LE-NEXT: adrp x8, .LCPI11_1
; CHECK-LE-NEXT: tbl v2.16b, { v0.16b }, v2.16b
; CHECK-LE-NEXT: tbl v3.16b, { v0.16b }, v3.16b
; CHECK-LE-NEXT: tbl v4.16b, { v0.16b }, v4.16b
; CHECK-LE-NEXT: ldr q5, [x8, :lo12:.LCPI11_1]
; CHECK-LE-NEXT: cmtst v0.16b, v1.16b, v5.16b
; CHECK-LE-NEXT: cmtst v1.16b, v2.16b, v5.16b
; CHECK-LE-NEXT: cmtst v2.16b, v3.16b, v5.16b
; CHECK-LE-NEXT: cmtst v3.16b, v4.16b, v5.16b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: broadcast_u64_to_v64i8_sext:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: fmov d0, x0
; CHECK-BE-NEXT: adrp x8, .LCPI11_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI11_0
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI11_4
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI11_4
; CHECK-BE-NEXT: adrp x9, .LCPI11_2
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI11_2
; CHECK-BE-NEXT: ld1 { v3.16b }, [x8]
; CHECK-BE-NEXT: rev64 v0.16b, v0.16b
; CHECK-BE-NEXT: adrp x8, .LCPI11_3
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI11_3
; CHECK-BE-NEXT: ld1 { v2.16b }, [x9]
; CHECK-BE-NEXT: ld1 { v4.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI11_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI11_1
; CHECK-BE-NEXT: ld1 { v5.16b }, [x8]
; CHECK-BE-NEXT: tbl v3.16b, { v0.16b }, v3.16b
; CHECK-BE-NEXT: tbl v1.16b, { v0.16b }, v1.16b
; CHECK-BE-NEXT: tbl v2.16b, { v0.16b }, v2.16b
; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v4.16b
; CHECK-BE-NEXT: cmtst v3.16b, v3.16b, v5.16b
; CHECK-BE-NEXT: cmtst v1.16b, v1.16b, v5.16b
; CHECK-BE-NEXT: cmtst v2.16b, v2.16b, v5.16b
; CHECK-BE-NEXT: cmtst v0.16b, v0.16b, v5.16b
; CHECK-BE-NEXT: rev64 v3.16b, v3.16b
; CHECK-BE-NEXT: rev64 v2.16b, v2.16b
; CHECK-BE-NEXT: rev64 v5.16b, v1.16b
; CHECK-BE-NEXT: rev64 v4.16b, v0.16b
; CHECK-BE-NEXT: ext v0.16b, v3.16b, v3.16b, #8
; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
; CHECK-BE-NEXT: ext v3.16b, v5.16b, v5.16b, #8
; CHECK-BE-NEXT: ext v1.16b, v4.16b, v4.16b, #8
; CHECK-BE-NEXT: ret
%v1 = bitcast i64 %x to <64 x i1>
%v8 = sext <64 x i1> %v1 to <64 x i8>
ret <64 x i8> %v8
}
define void @if_then_else8_i8(ptr %out, i8 %mask, ptr %if_true, ptr %if_false) {
; CHECK-LE-LABEL: if_then_else8_i8:
; CHECK-LE: // %bb.0: // %start
; CHECK-LE-NEXT: dup v0.8b, w1
; CHECK-LE-NEXT: adrp x8, .LCPI12_0
; CHECK-LE-NEXT: ldr d2, [x3]
; CHECK-LE-NEXT: ldr d1, [x8, :lo12:.LCPI12_0]
; CHECK-LE-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-LE-NEXT: ldr d1, [x2]
; CHECK-LE-NEXT: cmeq v0.8b, v0.8b, #0
; CHECK-LE-NEXT: bsl v0.8b, v2.8b, v1.8b
; CHECK-LE-NEXT: str d0, [x0]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: if_then_else8_i8:
; CHECK-BE: // %bb.0: // %start
; CHECK-BE-NEXT: dup v0.8b, w1
; CHECK-BE-NEXT: adrp x8, .LCPI12_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI12_0
; CHECK-BE-NEXT: ld1 { v1.8b }, [x8]
; CHECK-BE-NEXT: ld1 { v2.8b }, [x3]
; CHECK-BE-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-BE-NEXT: ld1 { v1.8b }, [x2]
; CHECK-BE-NEXT: cmeq v0.8b, v0.8b, #0
; CHECK-BE-NEXT: bsl v0.8b, v2.8b, v1.8b
; CHECK-BE-NEXT: st1 { v0.8b }, [x0]
; CHECK-BE-NEXT: ret
start:
%t = load <8 x i8>, ptr %if_true, align 4
%f = load <8 x i8>, ptr %if_false, align 4
%m = bitcast i8 %mask to <8 x i1>
%s = select <8 x i1> %m, <8 x i8> %t, <8 x i8> %f
store <8 x i8> %s, ptr %out, align 4
ret void
}
define void @if_then_else16_i16(ptr %out, i16 %mask, ptr %if_true, ptr %if_false) {
; CHECK-LE-LABEL: if_then_else16_i16:
; CHECK-LE: // %bb.0: // %start
; CHECK-LE-NEXT: adrp x8, .LCPI13_1
; CHECK-LE-NEXT: dup v0.8h, w1
; CHECK-LE-NEXT: ldr q1, [x8, :lo12:.LCPI13_1]
; CHECK-LE-NEXT: adrp x8, .LCPI13_0
; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI13_0]
; CHECK-LE-NEXT: ldp q4, q3, [x2]
; CHECK-LE-NEXT: and v1.16b, v0.16b, v1.16b
; CHECK-LE-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-LE-NEXT: ldp q5, q2, [x3]
; CHECK-LE-NEXT: cmeq v1.8h, v1.8h, #0
; CHECK-LE-NEXT: cmeq v0.8h, v0.8h, #0
; CHECK-LE-NEXT: bsl v1.16b, v2.16b, v3.16b
; CHECK-LE-NEXT: bsl v0.16b, v5.16b, v4.16b
; CHECK-LE-NEXT: stp q0, q1, [x0]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: if_then_else16_i16:
; CHECK-BE: // %bb.0: // %start
; CHECK-BE-NEXT: adrp x8, .LCPI13_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI13_1
; CHECK-BE-NEXT: dup v0.8h, w1
; CHECK-BE-NEXT: ld1 { v1.8h }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI13_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI13_0
; CHECK-BE-NEXT: ld1 { v2.8h }, [x8]
; CHECK-BE-NEXT: add x8, x2, #16
; CHECK-BE-NEXT: add x9, x3, #16
; CHECK-BE-NEXT: ld1 { v3.8h }, [x9]
; CHECK-BE-NEXT: ld1 { v4.8h }, [x2]
; CHECK-BE-NEXT: ld1 { v5.8h }, [x3]
; CHECK-BE-NEXT: and v1.16b, v0.16b, v1.16b
; CHECK-BE-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-BE-NEXT: ld1 { v2.8h }, [x8]
; CHECK-BE-NEXT: add x8, x0, #16
; CHECK-BE-NEXT: cmeq v1.8h, v1.8h, #0
; CHECK-BE-NEXT: cmeq v0.8h, v0.8h, #0
; CHECK-BE-NEXT: bsl v1.16b, v3.16b, v2.16b
; CHECK-BE-NEXT: bsl v0.16b, v5.16b, v4.16b
; CHECK-BE-NEXT: st1 { v1.8h }, [x8]
; CHECK-BE-NEXT: st1 { v0.8h }, [x0]
; CHECK-BE-NEXT: ret
start:
%t = load <16 x i16>, ptr %if_true, align 4
%f = load <16 x i16>, ptr %if_false, align 4
%m = bitcast i16 %mask to <16 x i1>
%s = select <16 x i1> %m, <16 x i16> %t, <16 x i16> %f
store <16 x i16> %s, ptr %out, align 4
ret void
}
define void @if_then_else32_i8(ptr %out, i32 %mask, ptr %if_true, ptr %if_false) {
; CHECK-LE-LABEL: if_then_else32_i8:
; CHECK-LE: // %bb.0: // %start
; CHECK-LE-NEXT: adrp x8, .LCPI14_2
; CHECK-LE-NEXT: fmov s1, w1
; CHECK-LE-NEXT: ldr q0, [x8, :lo12:.LCPI14_2]
; CHECK-LE-NEXT: adrp x8, .LCPI14_0
; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
; CHECK-LE-NEXT: adrp x8, .LCPI14_1
; CHECK-LE-NEXT: tbl v0.16b, { v1.16b }, v0.16b
; CHECK-LE-NEXT: ldp q4, q3, [x2]
; CHECK-LE-NEXT: tbl v1.16b, { v1.16b }, v2.16b
; CHECK-LE-NEXT: ldr q2, [x8, :lo12:.LCPI14_1]
; CHECK-LE-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-LE-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-LE-NEXT: ldp q5, q2, [x3]
; CHECK-LE-NEXT: cmeq v0.16b, v0.16b, #0
; CHECK-LE-NEXT: cmeq v1.16b, v1.16b, #0
; CHECK-LE-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-LE-NEXT: bsl v1.16b, v5.16b, v4.16b
; CHECK-LE-NEXT: stp q1, q0, [x0]
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: if_then_else32_i8:
; CHECK-BE: // %bb.0: // %start
; CHECK-BE-NEXT: fmov s0, w1
; CHECK-BE-NEXT: adrp x8, .LCPI14_2
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI14_2
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI14_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI14_0
; CHECK-BE-NEXT: ld1 { v2.16b }, [x8]
; CHECK-BE-NEXT: adrp x8, .LCPI14_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI14_1
; CHECK-BE-NEXT: rev32 v0.16b, v0.16b
; CHECK-BE-NEXT: ld1 { v3.16b }, [x8]
; CHECK-BE-NEXT: add x8, x2, #16
; CHECK-BE-NEXT: add x9, x3, #16
; CHECK-BE-NEXT: ld1 { v4.16b }, [x2]
; CHECK-BE-NEXT: ld1 { v5.16b }, [x3]
; CHECK-BE-NEXT: tbl v1.16b, { v0.16b }, v1.16b
; CHECK-BE-NEXT: tbl v0.16b, { v0.16b }, v2.16b
; CHECK-BE-NEXT: ld1 { v2.16b }, [x8]
; CHECK-BE-NEXT: add x8, x0, #16
; CHECK-BE-NEXT: and v1.16b, v1.16b, v3.16b
; CHECK-BE-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-BE-NEXT: ld1 { v3.16b }, [x9]
; CHECK-BE-NEXT: cmeq v1.16b, v1.16b, #0
; CHECK-BE-NEXT: cmeq v0.16b, v0.16b, #0
; CHECK-BE-NEXT: bsl v1.16b, v3.16b, v2.16b
; CHECK-BE-NEXT: bsl v0.16b, v5.16b, v4.16b
; CHECK-BE-NEXT: st1 { v1.16b }, [x8]
; CHECK-BE-NEXT: st1 { v0.16b }, [x0]
; CHECK-BE-NEXT: ret
start:
%t = load <32 x i8>, ptr %if_true, align 4
%f = load <32 x i8>, ptr %if_false, align 4
%m = bitcast i32 %mask to <32 x i1>
%s = select <32 x i1> %m, <32 x i8> %t, <32 x i8> %f
store <32 x i8> %s, ptr %out, align 4
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}