| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon,+lut,+bf16 | FileCheck %s |
| |
| define <16 x i8> @test_luti2_lane_i8(<8 x i8> %vn, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2_lane_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> %vn, <8 x i8> %vm, i32 0) |
| ret <16 x i8> %res |
| } |
| |
| define <16 x i8> @test_luti2_laneq_i8(<8 x i8> %vn, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2_laneq_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> %vn, <16 x i8> %vm, i32 0) |
| ret <16 x i8> %res |
| } |
| |
| define <16 x i8> @test_luti2q_lane_i8(<16 x i8> %vn, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2q_lane_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> %vn, <8 x i8> %vm, i32 0) |
| ret <16 x i8> %res |
| } |
| |
| define <16 x i8> @test_luti2q_laneq_i8(<16 x i8> %vn, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2q_laneq_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti2 v0.16b, { v0.16b }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> %vn, <16 x i8> %vm, i32 0) |
| ret <16 x i8> %res |
| } |
| |
| define <8 x i16> @test_luti2_lane_i16(<4 x i16> %vn, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2_lane_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> %vn, <8 x i8> %vm, i32 0) |
| ret <8 x i16> %res |
| } |
| |
| define <8 x i16> @test_luti2_laneq_i16(<4 x i16> %vn, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2_laneq_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> %vn, <16 x i8> %vm, i32 0) |
| ret <8 x i16> %res |
| } |
| |
| define <8 x i16> @test_luti2q_lane_i16(<4 x i16> %vn, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2q_lane_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<4 x i16> %vn, <8 x i8> %vm, i32 0) |
| ret <8 x i16> %res |
| } |
| |
| define <8 x i16> @test_luti2q_laneq_i16(<8 x i16> %vn, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2q_laneq_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> %vn, <16 x i8> %vm, i32 0) |
| ret <8 x i16> %res |
| } |
| |
| define <8 x half> @test_luti2_lane_f16(<4 x half> %vn, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2_lane_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v4f16(<4 x half> %vn, <8 x i8> %vm, i32 0) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_luti2_laneq_f16(<4 x half> %vn, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2_laneq_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v4i16(<4 x half> %vn, <16 x i8> %vm, i32 0) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_luti2q_lane_f16(<8 x half> %vn, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2q_lane_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8f16(<8 x half> %vn, <8 x i8> %vm, i32 0) |
| ret <8 x half> %res |
| } |
| |
| define <8 x half> @test_luti2q_laneq_f16(<8 x half> %vn, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2q_laneq_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v8f16(<8 x half> %vn, <16 x i8> %vm, i32 0) |
| ret <8 x half> %res |
| } |
| |
| define <8 x bfloat> @test_luti2_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2_lane_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v4bf16(<4 x bfloat> %vn, <8 x i8> %vm, i32 0) |
| ret <8 x bfloat> %res |
| } |
| |
| define <8 x bfloat> @test_luti2_laneq_bf16(<4 x bfloat> %vn, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2_laneq_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v4bf16(<4 x bfloat> %vn, <16 x i8> %vm, i32 0) |
| ret <8 x bfloat> %res |
| } |
| |
| define <8 x bfloat> @test_luti2q_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2q_lane_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8bf16(<4 x bfloat> %vn, <8 x i8> %vm, i32 0) |
| ret <8 x bfloat> %res |
| } |
| |
| define <8 x bfloat> @test_luti2q_laneq_bf16(<8 x bfloat> %vn, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti2q_laneq_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti2 v0.8h, { v0.8h }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v8bf16(<8 x bfloat> %vn, <16 x i8> %vm, i32 0) |
| ret <8 x bfloat> %res |
| } |
| |
| define <16 x i8> @test_luti4q_lane_i8(<16 x i8> %vn, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti4q_lane_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 |
| ; CHECK-NEXT: luti4 v0.16b, { v0.16b }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> %vn, <8 x i8> %vm, i32 0) |
| ret <16 x i8> %res |
| } |
| |
| define <16 x i8> @test_luti4q_laneq_i8(<16 x i8> %vn, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti4q_laneq_i8: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: luti4 v0.16b, { v0.16b }, v1[0] |
| ; CHECK-NEXT: ret |
| %res= tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> %vn, <16 x i8> %vm, i32 0) |
| ret <16 x i8> %res |
| } |
| |
| define <8 x i16> @test_luti4q_lane_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti4q_lane_x2_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i8> %vm, i32 1) |
| ret <8 x i16> %res |
| } |
| |
| define <8 x i16> @test_luti4q_laneq_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti4q_laneq_x2_i16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x i8> %vm, i32 1) |
| ret <8 x i16> %res |
| } |
| |
| define <8 x half> @test_luti4q_lane_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti4q_lane_x2_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x half> @llvm.aarch64.neon.vluti4q.lane.x2.v8f16(<8 x half> %vn1, <8 x half> %vn2, <8 x i8> %vm, i32 1) |
| ret <8 x half> %res |
| } |
| |
| |
| define <8 x half> @test_luti4q_laneq_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti4q_laneq_x2_f16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x half> @llvm.aarch64.neon.vluti4q.laneq.x2.v8f16(<8 x half> %vn1, <8 x half> %vn2, <16 x i8> %vm, i32 1) |
| ret <8 x half> %res |
| } |
| |
| define <8 x bfloat> @test_luti4q_laneq_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %vn2, <16 x i8> %vm){ |
| ; CHECK-LABEL: test_luti4q_laneq_x2_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.laneq.x2.v8bf16(<8 x bfloat> %vn1, <8 x bfloat> %vn2, <16 x i8> %vm, i32 1) |
| ret <8 x bfloat> %res |
| } |
| |
| define <8 x bfloat> @test_luti4q_lane_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %vn2, <8 x i8> %vm){ |
| ; CHECK-LABEL: test_luti4q_lane_x2_bf16: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 |
| ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 |
| ; CHECK-NEXT: luti4 v0.8h, { v0.8h, v1.8h }, v2[1] |
| ; CHECK-NEXT: ret |
| %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> %vn1, <8 x bfloat> %vn2, <8 x i8> %vm, i32 1) |
| ret <8 x bfloat> %res |
| } |