blob: dedd4323f151914b87fcb4714b497d2b1a23d357 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc %s -o - | FileCheck %s
target triple = "aarch64-linux-gnu"
;; An 'AnyOf' reduction (vector.reduce.or) is instcombined to a bitcast to an
;; integer of a bitwidth equal to the number of lanes being reduced, then
;; compared against zero. To select between vectors for NEON, we then need to
;; broadcast the result, but we must be careful when the bitwidth of the scalar
;; result is smaller than the element size of the vectors being selected. We
;; don't want to end up with scalarization.
define <4 x i32> @any_of_select_vf4(<4 x i32> %mask, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: any_of_select_vf4:
; CHECK: // %bb.0:
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
; CHECK-NEXT: umaxv s0, v0.4s
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: dup v0.4s, w8
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
; CHECK-NEXT: ret
%cmp = icmp slt <4 x i32> %mask, zeroinitializer
%cmp.bc = bitcast <4 x i1> %cmp to i4
%cmp.bc.not = icmp eq i4 %cmp.bc, 0
%res = select i1 %cmp.bc.not, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %res
}
define <2 x i64> @any_of_select_vf2(<2 x i64> %mask, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: any_of_select_vf2:
; CHECK: // %bb.0:
; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
; CHECK-NEXT: umaxv s0, v0.4s
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: dup v0.2d, x8
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
; CHECK-NEXT: ret
%cmp = icmp slt <2 x i64> %mask, zeroinitializer
%cmp.bc = bitcast <2 x i1> %cmp to i2
%cmp.bc.not = icmp eq i2 %cmp.bc, 0
%res = select i1 %cmp.bc.not, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %res
}
define <32 x i8> @any_of_select_vf32(<32 x i8> %mask, <32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: any_of_select_vf32:
; CHECK: // %bb.0:
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: umaxv b0, v0.16b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: tst w8, #0x1
; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: dup v1.16b, w8
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: bsl v1.16b, v5.16b, v3.16b
; CHECK-NEXT: bsl v0.16b, v4.16b, v2.16b
; CHECK-NEXT: ret
%cmp = icmp slt <32 x i8> %mask, zeroinitializer
%cmp.bc = bitcast <32 x i1> %cmp to i32
%cmp.bc.not = icmp eq i32 %cmp.bc, 0
%res = select i1 %cmp.bc.not, <32 x i8> %a, <32 x i8> %b
ret <32 x i8> %res
}