| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s |
| |
| define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv8i32_nxv4i32_0: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv2r.v v8, v12 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0) |
| ret <vscale x 8 x i32> %v |
| } |
| |
| define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv8i32_nxv4i32_4: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv2r.v v10, v12 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4) |
| ret <vscale x 8 x i32> %v |
| } |
| |
| define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv8i32_nxv2i32_0: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v8, v12 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0) |
| ret <vscale x 8 x i32> %v |
| } |
| |
| define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv8i32_nxv2i32_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v9, v12 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2) |
| ret <vscale x 8 x i32> %v |
| } |
| |
| define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv8i32_nxv2i32_4: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v10, v12 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4) |
| ret <vscale x 8 x i32> %v |
| } |
| |
| define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv8i32_nxv2i32_6: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v11, v12 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6) |
| ret <vscale x 8 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv8i32_0: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv4r.v v8, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 0) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv8i32_8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv4r.v v12, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 8) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv4i32_0: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv2r.v v8, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv4i32_4: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv2r.v v10, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv4i32_8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv2r.v v12, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 8) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv4i32_12: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv2r.v v14, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 12) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv2i32_0: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v8, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv2i32_2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v9, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv2i32_4: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v10, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv2i32_6: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v11, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv2i32_8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v12, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 8) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv2i32_10: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v13, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 10) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv2i32_12: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v14, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 12) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) { |
| ; CHECK-LABEL: insert_nxv16i32_nxv2i32_14: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vmv1r.v v15, v16 |
| ; CHECK-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 14) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| ; TODO: Inserts that are less than LMUL=1 are not yet supported. In this case |
| ; we need mask out the unaffected elements (top half of the VR %subvec |
| ; register) |
| ;define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) { |
| ; %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 0) |
| ; ret <vscale x 16 x i32> %v |
| ;} |
| |
| ; TODO: Inserts that don't align to a vector register are not yet supported. |
| ; In this case we want to insert the subvector into the upper half of the |
| ; lowest VR subregister in the LMUL group. |
| ;define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) { |
| ; %v = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1) |
| ; ret <vscale x 16 x i32> %v |
| ;} |
| |
| declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32>, <vscale x 2 x i32>, i64 %idx) |
| declare <vscale x 8 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32>, <vscale x 4 x i32>, i64 %idx) |
| |
| declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32>, <vscale x 1 x i32>, i64 %idx) |
| declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32>, <vscale x 2 x i32>, i64 %idx) |
| declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64 %idx) |
| declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32>, <vscale x 8 x i32>, i64 %idx) |