| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sve2,+lut | FileCheck %s |
| ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sve,+sme2,+lut | FileCheck %s |
| ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sme2,+lut --force-streaming | FileCheck %s |
| |
| define <vscale x 16 x i8> @test_luti2_lane_i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti2_lane_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti2 z0.b, { z0.b }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti2.lane.nxv16i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 16 x i8> %res |
| } |
| |
| define <vscale x 8 x i16> @test_luti2_lane_i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti2_lane_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti2 z0.h, { z0.h }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti2.lane.nxv8i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 8 x i16> %res |
| } |
| |
| define <vscale x 8 x half> @test_luti2_lane_f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti2_lane_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti2 z0.h, { z0.h }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti2.lane.nxv8f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 8 x half> %res |
| } |
| |
| define <vscale x 8 x bfloat> @test_luti2_lane_bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti2_lane_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti2 z0.h, { z0.h }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti2.lane.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 8 x bfloat> %res |
| } |
| |
| define <vscale x 16 x i8> @test_luti4_lane_i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti4_lane_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti4 z0.b, { z0.b }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 16 x i8> @llvm.aarch64.sve.luti4.lane.nxv16i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 16 x i8> %res |
| } |
| |
| define <vscale x 8 x i16> @test_luti4_lane_i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti4_lane_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti4 z0.h, { z0.h }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.nxv8i16(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 8 x i16> %res |
| } |
| |
| define <vscale x 8 x half> @test_luti4_lane_f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti4_lane_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti4 z0.h, { z0.h }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.nxv8f16(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 8 x half> %res |
| } |
| |
| define <vscale x 8 x bfloat> @test_luti4_lane_bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti4_lane_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti4 z0.h, { z0.h }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 8 x bfloat> %res |
| } |
| |
| define <vscale x 8 x i16> @test_luti4_lane_i16_x2(<vscale x 8 x i16> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti4_lane_i16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z2.d, z0.d |
| ; CHECK-NEXT: mov z3.d, z2.d |
| ; CHECK-NEXT: luti4 z0.h, { z2.h, z3.h }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.x2.nxv8i16(<vscale x 8 x i16> %table, <vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 8 x i16> %res |
| } |
| |
| define <vscale x 8 x half> @test_luti4_lane_f16_x2(<vscale x 8 x half> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti4_lane_f16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z2.d, z0.d |
| ; CHECK-NEXT: mov z3.d, z2.d |
| ; CHECK-NEXT: luti4 z0.h, { z2.h, z3.h }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.x2.nxv8f16(<vscale x 8 x half> %table, <vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 8 x half> %res |
| } |
| |
| define <vscale x 8 x bfloat> @test_luti4_lane_bf16_x2(<vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices){ |
| ; CHECK-LABEL: test_luti4_lane_bf16_x2: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: mov z2.d, z0.d |
| ; CHECK-NEXT: mov z3.d, z2.d |
| ; CHECK-NEXT: luti4 z0.h, { z2.h, z3.h }, z1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0) |
| ret <vscale x 8 x bfloat> %res |
| } |