| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck --check-prefixes=CHECK,SVE %s |
| ; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s -o - | FileCheck --check-prefixes=CHECK,SVE2 %s |
| |
| define <vscale x 2 x i64> @xar_nxv2i64_l(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { |
| ; SVE-LABEL: xar_nxv2i64_l: |
| ; SVE: // %bb.0: |
| ; SVE-NEXT: eor z0.d, z0.d, z1.d |
| ; SVE-NEXT: lsr z1.d, z0.d, #4 |
| ; SVE-NEXT: lsl z0.d, z0.d, #60 |
| ; SVE-NEXT: orr z0.d, z0.d, z1.d |
| ; SVE-NEXT: ret |
| ; |
| ; SVE2-LABEL: xar_nxv2i64_l: |
| ; SVE2: // %bb.0: |
| ; SVE2-NEXT: xar z0.d, z0.d, z1.d, #4 |
| ; SVE2-NEXT: ret |
| %a = xor <vscale x 2 x i64> %x, %y |
| %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 60)) |
| ret <vscale x 2 x i64> %b |
| } |
| |
| define <vscale x 2 x i64> @xar_nxv2i64_r(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { |
| ; SVE-LABEL: xar_nxv2i64_r: |
| ; SVE: // %bb.0: |
| ; SVE-NEXT: eor z0.d, z0.d, z1.d |
| ; SVE-NEXT: lsl z1.d, z0.d, #60 |
| ; SVE-NEXT: lsr z0.d, z0.d, #4 |
| ; SVE-NEXT: orr z0.d, z0.d, z1.d |
| ; SVE-NEXT: ret |
| ; |
| ; SVE2-LABEL: xar_nxv2i64_r: |
| ; SVE2: // %bb.0: |
| ; SVE2-NEXT: xar z0.d, z0.d, z1.d, #4 |
| ; SVE2-NEXT: ret |
| %a = xor <vscale x 2 x i64> %x, %y |
| %b = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 4)) |
| ret <vscale x 2 x i64> %b |
| } |
| |
| |
| define <vscale x 4 x i32> @xar_nxv4i32_l(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { |
| ; SVE-LABEL: xar_nxv4i32_l: |
| ; SVE: // %bb.0: |
| ; SVE-NEXT: eor z0.d, z0.d, z1.d |
| ; SVE-NEXT: lsr z1.s, z0.s, #4 |
| ; SVE-NEXT: lsl z0.s, z0.s, #28 |
| ; SVE-NEXT: orr z0.d, z0.d, z1.d |
| ; SVE-NEXT: ret |
| ; |
| ; SVE2-LABEL: xar_nxv4i32_l: |
| ; SVE2: // %bb.0: |
| ; SVE2-NEXT: xar z0.s, z0.s, z1.s, #4 |
| ; SVE2-NEXT: ret |
| %a = xor <vscale x 4 x i32> %x, %y |
| %b = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat (i32 28)) |
| ret <vscale x 4 x i32> %b |
| } |
| |
| define <vscale x 4 x i32> @xar_nxv4i32_r(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { |
| ; SVE-LABEL: xar_nxv4i32_r: |
| ; SVE: // %bb.0: |
| ; SVE-NEXT: eor z0.d, z0.d, z1.d |
| ; SVE-NEXT: lsl z1.s, z0.s, #28 |
| ; SVE-NEXT: lsr z0.s, z0.s, #4 |
| ; SVE-NEXT: orr z0.d, z0.d, z1.d |
| ; SVE-NEXT: ret |
| ; |
| ; SVE2-LABEL: xar_nxv4i32_r: |
| ; SVE2: // %bb.0: |
| ; SVE2-NEXT: xar z0.s, z0.s, z1.s, #4 |
| ; SVE2-NEXT: ret |
| %a = xor <vscale x 4 x i32> %x, %y |
| %b = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat (i32 4)) |
| ret <vscale x 4 x i32> %b |
| } |
| |
| define <vscale x 8 x i16> @xar_nxv8i16_l(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { |
| ; SVE-LABEL: xar_nxv8i16_l: |
| ; SVE: // %bb.0: |
| ; SVE-NEXT: eor z0.d, z0.d, z1.d |
| ; SVE-NEXT: lsr z1.h, z0.h, #4 |
| ; SVE-NEXT: lsl z0.h, z0.h, #12 |
| ; SVE-NEXT: orr z0.d, z0.d, z1.d |
| ; SVE-NEXT: ret |
| ; |
| ; SVE2-LABEL: xar_nxv8i16_l: |
| ; SVE2: // %bb.0: |
| ; SVE2-NEXT: xar z0.h, z0.h, z1.h, #4 |
| ; SVE2-NEXT: ret |
| %a = xor <vscale x 8 x i16> %x, %y |
| %b = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 8 x i16> splat (i16 12)) |
| ret <vscale x 8 x i16> %b |
| } |
| |
| define <vscale x 8 x i16> @xar_nxv8i16_r(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { |
| ; SVE-LABEL: xar_nxv8i16_r: |
| ; SVE: // %bb.0: |
| ; SVE-NEXT: eor z0.d, z0.d, z1.d |
| ; SVE-NEXT: lsl z1.h, z0.h, #12 |
| ; SVE-NEXT: lsr z0.h, z0.h, #4 |
| ; SVE-NEXT: orr z0.d, z0.d, z1.d |
| ; SVE-NEXT: ret |
| ; |
| ; SVE2-LABEL: xar_nxv8i16_r: |
| ; SVE2: // %bb.0: |
| ; SVE2-NEXT: xar z0.h, z0.h, z1.h, #4 |
| ; SVE2-NEXT: ret |
| %a = xor <vscale x 8 x i16> %x, %y |
| %b = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 8 x i16> splat (i16 4)) |
| ret <vscale x 8 x i16> %b |
| } |
| |
| define <vscale x 16 x i8> @xar_nxv16i8_l(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { |
| ; SVE-LABEL: xar_nxv16i8_l: |
| ; SVE: // %bb.0: |
| ; SVE-NEXT: eor z0.d, z0.d, z1.d |
| ; SVE-NEXT: lsr z1.b, z0.b, #4 |
| ; SVE-NEXT: lsl z0.b, z0.b, #4 |
| ; SVE-NEXT: orr z0.d, z0.d, z1.d |
| ; SVE-NEXT: ret |
| ; |
| ; SVE2-LABEL: xar_nxv16i8_l: |
| ; SVE2: // %bb.0: |
| ; SVE2-NEXT: xar z0.b, z0.b, z1.b, #4 |
| ; SVE2-NEXT: ret |
| %a = xor <vscale x 16 x i8> %x, %y |
| %b = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> splat (i8 4)) |
| ret <vscale x 16 x i8> %b |
| } |
| |
| define <vscale x 16 x i8> @xar_nxv16i8_r(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { |
| ; SVE-LABEL: xar_nxv16i8_r: |
| ; SVE: // %bb.0: |
| ; SVE-NEXT: eor z0.d, z0.d, z1.d |
| ; SVE-NEXT: lsl z1.b, z0.b, #4 |
| ; SVE-NEXT: lsr z0.b, z0.b, #4 |
| ; SVE-NEXT: orr z0.d, z0.d, z1.d |
| ; SVE-NEXT: ret |
| ; |
| ; SVE2-LABEL: xar_nxv16i8_r: |
| ; SVE2: // %bb.0: |
| ; SVE2-NEXT: xar z0.b, z0.b, z1.b, #4 |
| ; SVE2-NEXT: ret |
| %a = xor <vscale x 16 x i8> %x, %y |
| %b = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> splat (i8 4)) |
| ret <vscale x 16 x i8> %b |
| } |
| |
| ; Shift is not a constant. |
| define <vscale x 2 x i64> @xar_nxv2i64_l_neg1(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i64> %z) { |
| ; CHECK-LABEL: xar_nxv2i64_l_neg1: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z3.d, z2.d |
| ; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0 |
| ; CHECK-NEXT: eor z0.d, z0.d, z1.d |
| ; CHECK-NEXT: ptrue p0.d |
| ; CHECK-NEXT: and z3.d, z3.d, #0x3f |
| ; CHECK-NEXT: and z2.d, z2.d, #0x3f |
| ; CHECK-NEXT: movprfx z1, z0 |
| ; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d |
| ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d |
| ; CHECK-NEXT: orr z0.d, z1.d, z0.d |
| ; CHECK-NEXT: ret |
| %a = xor <vscale x 2 x i64> %x, %y |
| %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %z) |
| ret <vscale x 2 x i64> %b |
| } |
| |
| ; OR instead of an XOR. |
| ; TODO: We could use usra instruction here for SVE2. |
| define <vscale x 2 x i64> @xar_nxv2i64_l_neg2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { |
| ; CHECK-LABEL: xar_nxv2i64_l_neg2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: orr z0.d, z0.d, z1.d |
| ; CHECK-NEXT: lsr z1.d, z0.d, #4 |
| ; CHECK-NEXT: lsl z0.d, z0.d, #60 |
| ; CHECK-NEXT: orr z0.d, z0.d, z1.d |
| ; CHECK-NEXT: ret |
| %a = or <vscale x 2 x i64> %x, %y |
| %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 60)) |
| ret <vscale x 2 x i64> %b |
| } |
| |
| ; Rotate amount is 0. |
| define <vscale x 2 x i64> @xar_nxv2i64_l_neg3(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { |
| ; CHECK-LABEL: xar_nxv2i64_l_neg3: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: eor z0.d, z0.d, z1.d |
| ; CHECK-NEXT: ret |
| %a = xor <vscale x 2 x i64> %x, %y |
| %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 64)) |
| ret <vscale x 2 x i64> %b |
| } |
| |
| ; Uses individual shifts instead of funnel shifts, just one test. |
| define <vscale x 2 x i64> @xar_nxv2i64_shifts(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { |
| ; SVE-LABEL: xar_nxv2i64_shifts: |
| ; SVE: // %bb.0: |
| ; SVE-NEXT: eor z0.d, z0.d, z1.d |
| ; SVE-NEXT: lsr z1.d, z0.d, #4 |
| ; SVE-NEXT: lsl z0.d, z0.d, #60 |
| ; SVE-NEXT: orr z0.d, z0.d, z1.d |
| ; SVE-NEXT: ret |
| ; |
| ; SVE2-LABEL: xar_nxv2i64_shifts: |
| ; SVE2: // %bb.0: |
| ; SVE2-NEXT: xar z0.d, z0.d, z1.d, #4 |
| ; SVE2-NEXT: ret |
| %xor = xor <vscale x 2 x i64> %x, %y |
| %shl = shl <vscale x 2 x i64> %xor, splat (i64 60) |
| %shr = lshr <vscale x 2 x i64> %xor, splat (i64 4) |
| %or = or <vscale x 2 x i64> %shl, %shr |
| ret <vscale x 2 x i64> %or |
| } |
| |
| ; Not a rotate operation as 60 + 3 != 64 |
| define <vscale x 2 x i64> @xar_nxv2i64_shifts_neg(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { |
| ; CHECK-LABEL: xar_nxv2i64_shifts_neg: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: eor z0.d, z0.d, z1.d |
| ; CHECK-NEXT: lsl z1.d, z0.d, #60 |
| ; CHECK-NEXT: lsr z0.d, z0.d, #3 |
| ; CHECK-NEXT: orr z0.d, z1.d, z0.d |
| ; CHECK-NEXT: ret |
| %xor = xor <vscale x 2 x i64> %x, %y |
| %shl = shl <vscale x 2 x i64> %xor, splat (i64 60) |
| %shr = lshr <vscale x 2 x i64> %xor, splat (i64 3) |
| %or = or <vscale x 2 x i64> %shl, %shr |
| ret <vscale x 2 x i64> %or |
| } |
| |
| declare <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) |
| declare <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) |
| declare <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) |
| declare <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) |
| declare <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) |
| declare <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) |
| declare <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) |
| declare <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) |