| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc %s -o - | FileCheck %s |
| target triple = "aarch64-linux-gnu" |
| |
| ;; An 'AnyOf' reduction (vector.reduce.or) is instcombined to a bitcast to an |
| ;; integer of a bitwidth equal to the number of lanes being reduced, then |
| ;; compared against zero. To select between vectors for NEON, we then need to |
| ;; broadcast the result, but we must be careful when the bitwidth of the scalar |
| ;; result is smaller than the element size of the vectors being selected. We |
| ;; don't want to end up with scalarization. |
| |
| define <4 x i32> @any_of_select_vf4(<4 x i32> %mask, <4 x i32> %a, <4 x i32> %b) { |
| ; CHECK-LABEL: any_of_select_vf4: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 |
| ; CHECK-NEXT: umaxv s0, v0.4s |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: tst w8, #0x1 |
| ; CHECK-NEXT: csetm w8, ne |
| ; CHECK-NEXT: dup v0.4s, w8 |
| ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b |
| ; CHECK-NEXT: ret |
| %cmp = icmp slt <4 x i32> %mask, zeroinitializer |
| %cmp.bc = bitcast <4 x i1> %cmp to i4 |
| %cmp.bc.not = icmp eq i4 %cmp.bc, 0 |
| %res = select i1 %cmp.bc.not, <4 x i32> %a, <4 x i32> %b |
| ret <4 x i32> %res |
| } |
| |
| define <2 x i64> @any_of_select_vf2(<2 x i64> %mask, <2 x i64> %a, <2 x i64> %b) { |
| ; CHECK-LABEL: any_of_select_vf2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 |
| ; CHECK-NEXT: umaxv s0, v0.4s |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: tst w8, #0x1 |
| ; CHECK-NEXT: csetm x8, ne |
| ; CHECK-NEXT: dup v0.2d, x8 |
| ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b |
| ; CHECK-NEXT: ret |
| %cmp = icmp slt <2 x i64> %mask, zeroinitializer |
| %cmp.bc = bitcast <2 x i1> %cmp to i2 |
| %cmp.bc.not = icmp eq i2 %cmp.bc, 0 |
| %res = select i1 %cmp.bc.not, <2 x i64> %a, <2 x i64> %b |
| ret <2 x i64> %res |
| } |
| |
| define <32 x i8> @any_of_select_vf32(<32 x i8> %mask, <32 x i8> %a, <32 x i8> %b) { |
| ; CHECK-LABEL: any_of_select_vf32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b |
| ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 |
| ; CHECK-NEXT: umaxv b0, v0.16b |
| ; CHECK-NEXT: fmov w8, s0 |
| ; CHECK-NEXT: tst w8, #0x1 |
| ; CHECK-NEXT: csetm w8, ne |
| ; CHECK-NEXT: dup v1.16b, w8 |
| ; CHECK-NEXT: mov v0.16b, v1.16b |
| ; CHECK-NEXT: bsl v1.16b, v5.16b, v3.16b |
| ; CHECK-NEXT: bsl v0.16b, v4.16b, v2.16b |
| ; CHECK-NEXT: ret |
| %cmp = icmp slt <32 x i8> %mask, zeroinitializer |
| %cmp.bc = bitcast <32 x i1> %cmp to i32 |
| %cmp.bc.not = icmp eq i32 %cmp.bc, 0 |
| %res = select i1 %cmp.bc.not, <32 x i8> %a, <32 x i8> %b |
| ret <32 x i8> %res |
| } |