| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -O3 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s |
| |
| define void @constant_splat_fixed(ptr %p) { |
| ; CHECK-LABEL: constant_splat_fixed: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: ret |
| store <4 x i32> zeroinitializer, ptr %p |
| ret void |
| } |
| |
| define void @constant_splat_scalable(ptr %p) { |
| ; CHECK-LABEL: constant_splat_scalable: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: ret |
| store <vscale x 1 x i32> zeroinitializer, ptr %p |
| ret void |
| } |
| |
| ; FIXME: We should be able to use the earlier splat of zero here |
| ; since VLMAX >= 4. |
| define void @constant_splat_scalable_then_fixed(ptr %p, ptr %p2) { |
| ; CHECK-LABEL: constant_splat_scalable_then_fixed: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vse32.v v8, (a1) |
| ; CHECK-NEXT: ret |
| store <vscale x 1 x i32> zeroinitializer, ptr %p |
| store <4 x i32> zeroinitializer, ptr %p2 |
| ret void |
| } |
| |
| ; We could widen the first splat to VLMAX, but this might not |
| ; be generally profitable. |
| define void @constant_splat_fixed_then_scalable(ptr %p, ptr %p2) { |
| ; CHECK-LABEL: constant_splat_fixed_then_scalable: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vse32.v v8, (a1) |
| ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: ret |
| store <4 x i32> zeroinitializer, ptr %p2 |
| store <vscale x 1 x i32> zeroinitializer, ptr %p |
| ret void |
| } |
| |
| define void @splat_scalable(ptr %p, i32 %v) { |
| ; CHECK-LABEL: splat_scalable: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vmv.v.x v8, a1 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i32> poison, i32 %v, i32 0 |
| %splat = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer |
| store <vscale x 1 x i32> %splat, ptr %p |
| ret void |
| } |
| |
| define void @splat_fixed(ptr %p, i32 %v) { |
| ; CHECK-LABEL: splat_fixed: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vmv.v.x v8, a1 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: ret |
| %elt.head = insertelement <4 x i32> poison, i32 %v, i32 0 |
| %splat = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer |
| store <4 x i32> %splat, ptr %p |
| ret void |
| } |
| |
| ; FIXME: We should reschedule the first splat to reduce the need |
| ; for toggling VL |
| define void @mixed_splats1(ptr %p, i32 %v) { |
| ; CHECK-LABEL: mixed_splats1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vmv.v.x v8, a1 |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vmv.v.i v9, 0 |
| ; CHECK-NEXT: vse32.v v9, (a0) |
| ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i32> poison, i32 %v, i32 0 |
| %splat = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer |
| |
| store <4 x i32> zeroinitializer, ptr %p |
| store <vscale x 1 x i32> %splat, ptr %p |
| ret void |
| } |
| |
| define void @mixed_splats2(ptr %p, i32 %v) { |
| ; CHECK-LABEL: mixed_splats2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vmv.v.x v8, a1 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i32> poison, i32 %v, i32 0 |
| %splat = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer |
| |
| store <vscale x 1 x i32> %splat, ptr %p |
| store <4 x i32> zeroinitializer, ptr %p |
| ret void |
| } |
| |
| define void @extract_vector(ptr %p, i32 %v) { |
| ; CHECK-LABEL: extract_vector: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vmv.v.x v8, a1 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i32> poison, i32 %v, i32 0 |
| %splat = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer |
| |
| %fv = call <4 x i32> @llvm.vector.extract.v4i32.nxv1132(<vscale x 1 x i32> %splat, i64 0) |
| store <4 x i32> %fv, ptr %p |
| ret void |
| } |
| |
| define void @extract_vector_multiuse1(ptr %p, ptr %p2, i32 %v) { |
| ; CHECK-LABEL: extract_vector_multiuse1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vmv.v.x v8, a2 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: vse32.v v8, (a1) |
| ; CHECK-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i32> poison, i32 %v, i32 0 |
| %splat = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer |
| |
| %fv = call <4 x i32> @llvm.vector.extract.v4i32.nxv1132(<vscale x 1 x i32> %splat, i64 0) |
| store <4 x i32> %fv, ptr %p |
| store <4 x i32> %fv, ptr %p2 |
| ret void |
| } |
| |
| define <vscale x 1 x i32> @extract_vector_multiuse2(ptr %p, ptr %p2, i32 %v) { |
| ; CHECK-LABEL: extract_vector_multiuse2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vmv.v.x v8, a2 |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i32> poison, i32 %v, i32 0 |
| %splat = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer |
| |
| %fv = call <4 x i32> @llvm.vector.extract.v4i32.nxv1132(<vscale x 1 x i32> %splat, i64 0) |
| store <4 x i32> %fv, ptr %p |
| ret <vscale x 1 x i32> %splat |
| } |
| |
| define void @extract_vector_mixed1(ptr %p, ptr %p2, i32 %v) { |
| ; CHECK-LABEL: extract_vector_mixed1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a3, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vmv.v.x v8, a2 |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vse32.v v8, (a1) |
| ; CHECK-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i32> poison, i32 %v, i32 0 |
| %splat = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer |
| |
| %fv = call <4 x i32> @llvm.vector.extract.v4i32.nxv1132(<vscale x 1 x i32> %splat, i64 0) |
| |
| store <4 x i32> %fv, ptr %p |
| store <vscale x 1 x i32> %splat, ptr %p2 |
| ret void |
| } |
| |
| define void @extract_vector_mixed2(ptr %p, ptr %p2, i32 %v) { |
| ; CHECK-LABEL: extract_vector_mixed2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a3, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vmv.v.x v8, a2 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vse32.v v8, (a1) |
| ; CHECK-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i32> poison, i32 %v, i32 0 |
| %splat = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer |
| store <vscale x 1 x i32> %splat, ptr %p |
| |
| %fv = call <4 x i32> @llvm.vector.extract.v4i32.nxv1132(<vscale x 1 x i32> %splat, i64 0) |
| store <4 x i32> %fv, ptr %p2 |
| ret void |
| } |
| |
| define void @extract_vector_mixed3(ptr %p, ptr %p2, i32 %v) { |
| ; CHECK-LABEL: extract_vector_mixed3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a3, zero, e32, mf2, ta, ma |
| ; CHECK-NEXT: vmv.v.x v8, a2 |
| ; CHECK-NEXT: vse32.v v8, (a0) |
| ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma |
| ; CHECK-NEXT: vse32.v v8, (a1) |
| ; CHECK-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i32> poison, i32 %v, i32 0 |
| %splat = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer |
| %fv = call <4 x i32> @llvm.vector.extract.v4i32.nxv1132(<vscale x 1 x i32> %splat, i64 0) |
| |
| store <vscale x 1 x i32> %splat, ptr %p |
| store <4 x i32> %fv, ptr %p2 |
| ret void |
| } |
| |
| |
| declare <4 x i32> @llvm.vector.extract.v4i32.nxv1132(<vscale x 1 x i32> %vec, i64 %idx) |