| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK |
| |
| define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: bsl: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z2.s, #0x7fffffff |
| ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647) |
| %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648) |
| %c = or <vscale x 4 x i32> %1, %2 |
| ret <vscale x 4 x i32> %c |
| } |
| |
| define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 { |
| ; CHECK-LABEL: bsl_add_sub: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0 |
| ; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d |
| ; CHECK-NEXT: mov z0.d, z1.d |
| ; CHECK-NEXT: ret |
| %neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond |
| %min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1) |
| %left_bits_0 = and <vscale x 4 x i32> %neg_cond, %left |
| %right_bits_0 = and <vscale x 4 x i32> %min_cond, %right |
| %bsl0000 = or <vscale x 4 x i32> %right_bits_0, %left_bits_0 |
| ret <vscale x 4 x i32> %bsl0000 |
| } |
| |
| ; we are not expecting bsl instruction here. the constants do not match to fold to bsl. |
| define <vscale x 4 x i32> @no_bsl_fold(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: no_bsl_fold: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff |
| ; CHECK-NEXT: and z1.s, z1.s, #0x7ffffffe |
| ; CHECK-NEXT: orr z0.d, z0.d, z1.d |
| ; CHECK-NEXT: ret |
| %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647) |
| %2 = and <vscale x 4 x i32> %b, splat(i32 2147483646) |
| %c = or <vscale x 4 x i32> %1, %2 |
| ret <vscale x 4 x i32> %c |
| } |
| |
| define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { |
| ; CHECK-LABEL: nbsl_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z2.b, #127 // =0x7f |
| ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %1 = and <vscale x 16 x i8> %a, splat(i8 127) |
| %2 = and <vscale x 16 x i8> %b, splat(i8 -128) |
| %3 = or <vscale x 16 x i8> %1, %2 |
| %4 = xor <vscale x 16 x i8> %3, splat(i8 -1) |
| ret <vscale x 16 x i8> %4 |
| } |
| |
| define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { |
| ; CHECK-LABEL: nbsl_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z2.h, #32767 // =0x7fff |
| ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %1 = and <vscale x 8 x i16> %a, splat(i16 32767) |
| %2 = and <vscale x 8 x i16> %b, splat(i16 -32768) |
| %3 = or <vscale x 8 x i16> %1, %2 |
| %4 = xor <vscale x 8 x i16> %3, splat(i16 -1) |
| ret <vscale x 8 x i16> %4 |
| } |
| |
| define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { |
| ; CHECK-LABEL: nbsl_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z2.s, #0x7fffffff |
| ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %1 = and <vscale x 4 x i32> %a, splat(i32 2147483647) |
| %2 = and <vscale x 4 x i32> %b, splat(i32 -2147483648) |
| %3 = or <vscale x 4 x i32> %1, %2 |
| %4 = xor <vscale x 4 x i32> %3, splat(i32 -1) |
| ret <vscale x 4 x i32> %4 |
| } |
| |
| define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { |
| ; CHECK-LABEL: nbsl_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff |
| ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %1 = and <vscale x 2 x i64> %a, splat(i64 9223372036854775807) |
| %2 = and <vscale x 2 x i64> %b, splat(i64 -9223372036854775808) |
| %3 = or <vscale x 2 x i64> %1, %2 |
| %4 = xor <vscale x 2 x i64> %3, splat(i64 -1) |
| ret <vscale x 2 x i64> %4 |
| } |
| |
| ; Test BSL/NBSL/BSL1N/BSL2N code generation for: |
| ; #define BSL(x,y,z) ( ((x) & (z)) | ( (y) & ~(z))) |
| ; #define NBSL(x,y,z) (~(((x) & (z)) | ( (y) & ~(z)))) |
| ; #define BSL1N(x,y,z) ( (~(x) & (z)) | ( (y) & ~(z))) |
| ; #define BSL2N(x,y,z) ( ((x) & (z)) | (~(y) & ~(z))) |
| |
| define <vscale x 16 x i8> @codegen_bsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) { |
| ; CHECK-LABEL: codegen_bsl_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 16 x i8> %2, %0 |
| %5 = xor <vscale x 16 x i8> %2, splat (i8 -1) |
| %6 = and <vscale x 16 x i8> %1, %5 |
| %7 = or <vscale x 16 x i8> %4, %6 |
| ret <vscale x 16 x i8> %7 |
| } |
| |
| define <vscale x 16 x i8> @codegen_nbsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) { |
| ; CHECK-LABEL: codegen_nbsl_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 16 x i8> %2, %0 |
| %5 = xor <vscale x 16 x i8> %2, splat (i8 -1) |
| %6 = and <vscale x 16 x i8> %1, %5 |
| %7 = or <vscale x 16 x i8> %4, %6 |
| %8 = xor <vscale x 16 x i8> %7, splat (i8 -1) |
| ret <vscale x 16 x i8> %8 |
| } |
| |
| define <vscale x 16 x i8> @codegen_bsl1n_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) { |
| ; CHECK-LABEL: codegen_bsl1n_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = xor <vscale x 16 x i8> %0, splat (i8 -1) |
| %5 = and <vscale x 16 x i8> %2, %4 |
| %6 = xor <vscale x 16 x i8> %2, splat (i8 -1) |
| %7 = and <vscale x 16 x i8> %1, %6 |
| %8 = or <vscale x 16 x i8> %5, %7 |
| ret <vscale x 16 x i8> %8 |
| } |
| |
| define <vscale x 16 x i8> @codegen_bsl2n_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) { |
| ; CHECK-LABEL: codegen_bsl2n_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 16 x i8> %2, %0 |
| %5 = or <vscale x 16 x i8> %2, %1 |
| %6 = xor <vscale x 16 x i8> %5, splat (i8 -1) |
| %7 = or <vscale x 16 x i8> %4, %6 |
| ret <vscale x 16 x i8> %7 |
| } |
| |
| define <vscale x 8 x i16> @codegen_bsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) { |
| ; CHECK-LABEL: codegen_bsl_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 8 x i16> %2, %0 |
| %5 = xor <vscale x 8 x i16> %2, splat (i16 -1) |
| %6 = and <vscale x 8 x i16> %1, %5 |
| %7 = or <vscale x 8 x i16> %4, %6 |
| ret <vscale x 8 x i16> %7 |
| } |
| |
| define <vscale x 8 x i16> @codegen_nbsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) { |
| ; CHECK-LABEL: codegen_nbsl_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 8 x i16> %2, %0 |
| %5 = xor <vscale x 8 x i16> %2, splat (i16 -1) |
| %6 = and <vscale x 8 x i16> %1, %5 |
| %7 = or <vscale x 8 x i16> %4, %6 |
| %8 = xor <vscale x 8 x i16> %7, splat (i16 -1) |
| ret <vscale x 8 x i16> %8 |
| } |
| |
| define <vscale x 8 x i16> @codegen_bsl1n_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) { |
| ; CHECK-LABEL: codegen_bsl1n_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = xor <vscale x 8 x i16> %0, splat (i16 -1) |
| %5 = and <vscale x 8 x i16> %2, %4 |
| %6 = xor <vscale x 8 x i16> %2, splat (i16 -1) |
| %7 = and <vscale x 8 x i16> %1, %6 |
| %8 = or <vscale x 8 x i16> %5, %7 |
| ret <vscale x 8 x i16> %8 |
| } |
| |
| define <vscale x 8 x i16> @codegen_bsl2n_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) { |
| ; CHECK-LABEL: codegen_bsl2n_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 8 x i16> %2, %0 |
| %5 = or <vscale x 8 x i16> %2, %1 |
| %6 = xor <vscale x 8 x i16> %5, splat (i16 -1) |
| %7 = or <vscale x 8 x i16> %4, %6 |
| ret <vscale x 8 x i16> %7 |
| } |
| |
| define <vscale x 4 x i32> @codegen_bsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) { |
| ; CHECK-LABEL: codegen_bsl_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 4 x i32> %2, %0 |
| %5 = xor <vscale x 4 x i32> %2, splat (i32 -1) |
| %6 = and <vscale x 4 x i32> %1, %5 |
| %7 = or <vscale x 4 x i32> %4, %6 |
| ret <vscale x 4 x i32> %7 |
| } |
| |
| define <vscale x 4 x i32> @codegen_nbsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) { |
| ; CHECK-LABEL: codegen_nbsl_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 4 x i32> %2, %0 |
| %5 = xor <vscale x 4 x i32> %2, splat (i32 -1) |
| %6 = and <vscale x 4 x i32> %1, %5 |
| %7 = or <vscale x 4 x i32> %4, %6 |
| %8 = xor <vscale x 4 x i32> %7, splat (i32 -1) |
| ret <vscale x 4 x i32> %8 |
| } |
| |
| define <vscale x 4 x i32> @codegen_bsl1n_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) { |
| ; CHECK-LABEL: codegen_bsl1n_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = xor <vscale x 4 x i32> %0, splat (i32 -1) |
| %5 = and <vscale x 4 x i32> %2, %4 |
| %6 = xor <vscale x 4 x i32> %2, splat (i32 -1) |
| %7 = and <vscale x 4 x i32> %1, %6 |
| %8 = or <vscale x 4 x i32> %5, %7 |
| ret <vscale x 4 x i32> %8 |
| } |
| |
| define <vscale x 4 x i32> @codegen_bsl2n_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) { |
| ; CHECK-LABEL: codegen_bsl2n_i32: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 4 x i32> %2, %0 |
| %5 = or <vscale x 4 x i32> %2, %1 |
| %6 = xor <vscale x 4 x i32> %5, splat (i32 -1) |
| %7 = or <vscale x 4 x i32> %4, %6 |
| ret <vscale x 4 x i32> %7 |
| } |
| |
| define <vscale x 2 x i64> @codegen_bsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) { |
| ; CHECK-LABEL: codegen_bsl_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 2 x i64> %2, %0 |
| %5 = xor <vscale x 2 x i64> %2, splat (i64 -1) |
| %6 = and <vscale x 2 x i64> %1, %5 |
| %7 = or <vscale x 2 x i64> %4, %6 |
| ret <vscale x 2 x i64> %7 |
| } |
| |
| define <vscale x 2 x i64> @codegen_nbsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) { |
| ; CHECK-LABEL: codegen_nbsl_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 2 x i64> %2, %0 |
| %5 = xor <vscale x 2 x i64> %2, splat (i64 -1) |
| %6 = and <vscale x 2 x i64> %1, %5 |
| %7 = or <vscale x 2 x i64> %4, %6 |
| %8 = xor <vscale x 2 x i64> %7, splat (i64 -1) |
| ret <vscale x 2 x i64> %8 |
| } |
| |
| define <vscale x 2 x i64> @codegen_bsl1n_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) { |
| ; CHECK-LABEL: codegen_bsl1n_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = xor <vscale x 2 x i64> %0, splat (i64 -1) |
| %5 = and <vscale x 2 x i64> %2, %4 |
| %6 = xor <vscale x 2 x i64> %2, splat (i64 -1) |
| %7 = and <vscale x 2 x i64> %1, %6 |
| %8 = or <vscale x 2 x i64> %5, %7 |
| ret <vscale x 2 x i64> %8 |
| } |
| |
| define <vscale x 2 x i64> @codegen_bsl2n_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) { |
| ; CHECK-LABEL: codegen_bsl2n_i64: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| %4 = and <vscale x 2 x i64> %2, %0 |
| %5 = or <vscale x 2 x i64> %2, %1 |
| %6 = xor <vscale x 2 x i64> %5, splat (i64 -1) |
| %7 = or <vscale x 2 x i64> %4, %6 |
| ret <vscale x 2 x i64> %7 |
| } |
| |
| ; (A ^ B) & C) ^ B -> (A & C) | (B & !C) when BIC instructions are available. |
| define <vscale x 4 x i32> @bsl_combine_when_bic_available(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { |
| ; CHECK-LABEL: bsl_combine_when_bic_available: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d |
| ; CHECK-NEXT: ret |
| entry: |
| %t1 = xor <vscale x 4 x i32> %a, %b |
| %t2 = and <vscale x 4 x i32> %t1, %c |
| %t3 = xor <vscale x 4 x i32> %t2, %b |
| ret <vscale x 4 x i32> %t3 |
| } |
| |
| ; NOT (a) = NBSL (a, a, a). |
| ; We don't have a pattern for this right now because the tied register |
| ; constraint can lead to worse code gen. |
| define <vscale x 2 x i64> @not(<vscale x 2 x i64> %0) #0 { |
| ; CHECK-LABEL: not: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff |
| ; CHECK-NEXT: eor z0.d, z0.d, z1.d |
| ; CHECK-NEXT: ret |
| %2 = xor <vscale x 2 x i64> %0, splat (i64 -1) |
| ret <vscale x 2 x i64> %2 |
| } |
| |
| ; NAND (a, b) = NBSL (a, b, b) = NBSL (b, a, a). |
| define <vscale x 2 x i64> @nand(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 { |
| ; CHECK-LABEL: nand: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z1.d |
| ; CHECK-NEXT: ret |
| %3 = and <vscale x 2 x i64> %1, %0 |
| %4 = xor <vscale x 2 x i64> %3, splat (i64 -1) |
| ret <vscale x 2 x i64> %4 |
| } |
| |
| ; NOR (a, b) = NBSL (a, b, a) = NBSL (b, a, b). |
| define <vscale x 2 x i64> @nor(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 { |
| ; CHECK-LABEL: nor: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z0.d |
| ; CHECK-NEXT: ret |
| %3 = or <vscale x 2 x i64> %1, %0 |
| %4 = xor <vscale x 2 x i64> %3, splat (i64 -1) |
| ret <vscale x 2 x i64> %4 |
| } |
| |
| ; EON (a, b) = BSL2N (a, a, b) = BSL2N (b, b, a). |
| define <vscale x 2 x i64> @eon(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 { |
| ; CHECK-LABEL: eon: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl2n z0.d, z0.d, z0.d, z1.d |
| ; CHECK-NEXT: ret |
| %3 = xor <vscale x 2 x i64> %0, %1 |
| %4 = xor <vscale x 2 x i64> %3, splat (i64 -1) |
| ret <vscale x 2 x i64> %4 |
| } |
| |
| ; ORN (a, b) = BSL2N (a, b, a). |
| define <vscale x 2 x i64> @orn(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 { |
| ; CHECK-LABEL: orn: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z0.d |
| ; CHECK-NEXT: ret |
| %3 = xor <vscale x 2 x i64> %1, splat (i64 -1) |
| %4 = or <vscale x 2 x i64> %0, %3 |
| ret <vscale x 2 x i64> %4 |
| } |