| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s |
| |
| declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32) |
| declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32) |
| declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32) |
| declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32) |
| |
| ; Test binary operator with vp.merge and vp.smax. |
| declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) |
| define <vscale x 2 x i32> @vpmerge_vpadd(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpadd: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test glued node of merge should not be deleted. |
| declare <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, metadata, <vscale x 2 x i1>, i32) |
| define <vscale x 2 x i32> @vpmerge_vpadd2(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpadd2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma |
| ; CHECK-NEXT: vmseq.vv v0, v9, v10 |
| ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test vp.merge has all-ones mask. |
| define <vscale x 2 x i32> @vpmerge_vpadd3(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpadd3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma |
| ; CHECK-NEXT: vadd.vv v8, v9, v10 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test float binary operator with vp.merge and vp.fadd. |
| declare <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32) |
| define <vscale x 2 x float> @vpmerge_vpfadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpfadd: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test for binary operator with specific EEW by riscv.vrgatherei16. |
| declare <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i16>, i64) |
| define <vscale x 2 x i32> @vpmerge_vrgatherei16(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vrgatherei16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma |
| ; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %2 = tail call <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, i64 %1) |
| %3 = tail call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %2, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %2 |
| } |
| |
| ; Test conversion by fptosi. |
| declare <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) |
| define <vscale x 2 x i16> @vpmerge_vpfptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpfptosi: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu |
| ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl) |
| ret <vscale x 2 x i16> %b |
| } |
| |
| ; Test conversion by sitofp. |
| declare <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32) |
| define <vscale x 2 x float> @vpmerge_vpsitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpsitofp: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test integer extension by vp.zext. |
| declare <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32) |
| define <vscale x 2 x i32> @vpmerge_vpzext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpzext: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test integer truncation by vp.trunc. |
| declare <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32) |
| define <vscale x 2 x i32> @vpmerge_vptrunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vptrunc: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test integer extension by vp.fpext. |
| declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) |
| define <vscale x 2 x double> @vpmerge_vpfpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpfpext: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) |
| ret <vscale x 2 x double> %b |
| } |
| |
| ; Test integer truncation by vp.trunc. |
| declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) |
| define <vscale x 2 x float> @vpmerge_vpfptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpfptrunc: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test load operation by vp.load. |
| declare <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr, <vscale x 2 x i1>, i32) |
| define <vscale x 2 x i32> @vpmerge_vpload(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpload: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu |
| ; CHECK-NEXT: vle32.v v8, (a0), v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test result has chain and glued node. |
| define <vscale x 2 x i32> @vpmerge_vpload2(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpload2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma |
| ; CHECK-NEXT: vmseq.vv v0, v9, v10 |
| ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu |
| ; CHECK-NEXT: vle32.v v8, (a0), v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test result has chain output of true operand of merge.vvm. |
| define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vpload_store: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu |
| ; CHECK-NEXT: vle32.v v8, (a0), v0.t |
| ; CHECK-NEXT: vs1r.v v8, (a0) |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| store <vscale x 2 x i32> %b, ptr %p |
| ret void |
| } |
| |
| declare { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32>, ptr, i64) |
| define <vscale x 2 x i32> @vpmerge_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vleff: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu |
| ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1) |
| %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0 |
| %c = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %b, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %c |
| } |
| |
| ; Test strided load by riscv.vlse |
| declare <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32>, ptr, i64, i64) |
| define <vscale x 2 x i32> @vpmerge_vlse(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vlse: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu |
| ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %s, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test indexed load by riscv.vluxei |
| declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32>, ptr, <vscale x 2 x i64>, i64) |
| define <vscale x 2 x i32> @vpmerge_vluxei(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i64> %idx, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vluxei: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu |
| ; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, ptr %p, <vscale x 2 x i64> %idx, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test vector index by riscv.vid |
| declare <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32>, i64) |
| define <vscale x 2 x i32> @vpmerge_vid(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vid: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vid.v v8, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32> undef, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test not combine VIOTA_M and VMERGE_VVM without true mask. |
| declare <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i64) |
| define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_viota: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma |
| ; CHECK-NEXT: viota.m v10, v9 |
| ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma |
| ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test combine VIOTA_M and VMERGE_VVM with true mask. |
| define <vscale x 2 x i32> @vpmerge_viota2(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %vm, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_viota2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma |
| ; CHECK-NEXT: viota.m v8, v0 |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test riscv.vfclass |
| declare <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x float>, i64) |
| define <vscale x 2 x i32> @vpmerge_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vflcass: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfclass.v v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x float> %vf, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test riscv.vfsqrt |
| declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64, i64) |
| define <vscale x 2 x float> @vpmerge_vfsqrt(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vfsqrt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1) |
| %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test reciprocal operation by riscv.vfrec7 |
| declare <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64, i64) |
| define <vscale x 2 x float> @vpmerge_vfrec7(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vfrec7: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfrec7.v v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1) |
| %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test vector operations with VLMAX vector length. |
| |
| ; Test binary operator with vp.merge and add. |
| define <vscale x 2 x i32> @vpmerge_add(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_add: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = add <vscale x 2 x i32> %x, %y |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test binary operator with vp.merge and fadd. |
| define <vscale x 2 x float> @vpmerge_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_fadd: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = fadd <vscale x 2 x float> %x, %y |
| %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; This shouldn't be folded because we need to preserve exceptions with |
| ; "fpexcept.strict" exception behaviour, and masking may hide them. |
| define <vscale x 2 x float> @vpmerge_constrained_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i64 %vl) strictfp { |
| ; CHECK-LABEL: vpmerge_constrained_fadd: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma |
| ; CHECK-NEXT: vfadd.vv v9, v9, v10 |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma |
| ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp |
| %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 %vl) strictfp |
| ret <vscale x 2 x float> %b |
| } |
| declare <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, metadata, metadata) |
| declare <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i64) |
| |
| ; This shouldn't be folded because we need to preserve exceptions with |
| ; "fpexcept.strict" exception behaviour, and masking may hide them. |
| define <vscale x 2 x float> @vpmerge_constrained_fadd_vlmax(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m) strictfp { |
| ; CHECK-LABEL: vpmerge_constrained_fadd_vlmax: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma |
| ; CHECK-NEXT: vfadd.vv v9, v9, v10 |
| ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma |
| ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp |
| %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 -1) strictfp |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test conversion by fptosi. |
| define <vscale x 2 x i16> @vpmerge_fptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_fptosi: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu |
| ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %a = fptosi <vscale x 2 x float> %x to <vscale x 2 x i16> |
| %b = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl) |
| ret <vscale x 2 x i16> %b |
| } |
| |
| ; Test conversion by sitofp. |
| define <vscale x 2 x float> @vpmerge_sitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_sitofp: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = sitofp <vscale x 2 x i64> %x to <vscale x 2 x float> |
| %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test float extension by fpext. |
| define <vscale x 2 x double> @vpmerge_fpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_fpext: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = fpext <vscale x 2 x float> %x to <vscale x 2 x double> |
| %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) |
| ret <vscale x 2 x double> %b |
| } |
| |
| ; Test float truncation by fptrunc. |
| define <vscale x 2 x float> @vpmerge_fptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_fptrunc: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float> |
| %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test integer extension by zext. |
| define <vscale x 2 x i32> @vpmerge_zext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_zext: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %a = zext <vscale x 2 x i8> %x to <vscale x 2 x i32> |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test integer truncation by trunc. |
| define <vscale x 2 x i32> @vpmerge_trunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_trunc: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t |
| ; CHECK-NEXT: ret |
| %a = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32> |
| %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| declare <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32) |
| declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32) |
| declare <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32) |
| declare <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32) |
| |
| ; Test binary operator with vp.select and vp.smax. |
| define <vscale x 2 x i32> @vpselect_vpadd(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpadd: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test glued node of select should not be deleted. |
| define <vscale x 2 x i32> @vpselect_vpadd2(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpadd2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vmseq.vv v0, v9, v10 |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test vp.select has all-ones mask. |
| define <vscale x 2 x i32> @vpselect_vpadd3(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpadd3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma |
| ; CHECK-NEXT: vadd.vv v8, v9, v10 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test float binary operator with vp.select and vp.fadd. |
| define <vscale x 2 x float> @vpselect_vpfadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpfadd: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test for binary operator with specific EEW by riscv.vrgatherei16. |
| define <vscale x 2 x i32> @vpselect_vrgatherei16(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vrgatherei16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma |
| ; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %2 = tail call <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, i64 %1) |
| %3 = tail call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %2, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %2 |
| } |
| |
| ; Test conversion by fptosi. |
| define <vscale x 2 x i16> @vpselect_vpfptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpfptosi: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu |
| ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl) |
| ret <vscale x 2 x i16> %b |
| } |
| |
| ; Test conversion by sitofp. |
| define <vscale x 2 x float> @vpselect_vpsitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpsitofp: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test integer extension by vp.zext. |
| define <vscale x 2 x i32> @vpselect_vpzext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpzext: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test integer truncation by vp.trunc. |
| define <vscale x 2 x i32> @vpselect_vptrunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vptrunc: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test integer extension by vp.fpext. |
| define <vscale x 2 x double> @vpselect_vpfpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpfpext: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) |
| ret <vscale x 2 x double> %b |
| } |
| |
| ; Test integer truncation by vp.trunc. |
| define <vscale x 2 x float> @vpselect_vpfptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpfptrunc: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test load operation by vp.load. |
| define <vscale x 2 x i32> @vpselect_vpload(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpload: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu |
| ; CHECK-NEXT: vle32.v v8, (a0), v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test result has chain and glued node. |
| define <vscale x 2 x i32> @vpselect_vpload2(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpload2: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu |
| ; CHECK-NEXT: vmseq.vv v0, v9, v10 |
| ; CHECK-NEXT: vle32.v v8, (a0), v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test result has chain output of true operand of select.vvm. |
| define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vpload_store: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu |
| ; CHECK-NEXT: vle32.v v8, (a0), v0.t |
| ; CHECK-NEXT: vs1r.v v8, (a0) |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| store <vscale x 2 x i32> %b, ptr %p |
| ret void |
| } |
| |
| define <vscale x 2 x i32> @vpselect_vleff(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vleff: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu |
| ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %1) |
| %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0 |
| %c = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %b, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %c |
| } |
| |
| ; Test strided load by riscv.vlse |
| define <vscale x 2 x i32> @vpselect_vlse(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vlse: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu |
| ; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32> undef, ptr %p, i64 %s, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test indexed load by riscv.vluxei |
| define <vscale x 2 x i32> @vpselect_vluxei(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i64> %idx, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vluxei: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu |
| ; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, ptr %p, <vscale x 2 x i64> %idx, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test vector index by riscv.vid |
| define <vscale x 2 x i32> @vpselect_vid(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vid: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vid.v v8, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32> undef, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test riscv.viota |
| define <vscale x 2 x i32> @vpselect_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_viota: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma |
| ; CHECK-NEXT: viota.m v10, v9 |
| ; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test riscv.vfclass |
| define <vscale x 2 x i32> @vpselect_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vflcass: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfclass.v v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x float> %vf, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test riscv.vfsqrt |
| define <vscale x 2 x float> @vpselect_vfsqrt(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vfsqrt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfsqrt.v v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1) |
| %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test reciprocal operation by riscv.vfrec7 |
| define <vscale x 2 x float> @vpselect_vfrec7(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vfrec7: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfrec7.v v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 7, i64 %1) |
| %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test slides |
| declare <vscale x 2 x i32> @llvm.riscv.vslideup.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64, i64, i64) |
| define <vscale x 2 x i32> @vpselect_vslideup(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i64 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vslideup: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu |
| ; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vslideup.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i64 %x, i64 %1, i64 0) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| declare <vscale x 2 x i32> @llvm.riscv.vslidedown.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64, i64, i64) |
| define <vscale x 2 x i32> @vpselect_vslidedown(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i64 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vslidedown: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu |
| ; CHECK-NEXT: vslidedown.vx v8, v9, a0, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vslidedown.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i64 %x, i64 %1, i64 0) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| declare <vscale x 2 x i32> @llvm.riscv.vslide1up.nxv2i32.i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32, i64) |
| define <vscale x 2 x i32> @vpselect_vslide1up(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i32 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vslide1up: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu |
| ; CHECK-NEXT: vslide1up.vx v8, v9, a0, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vslide1up.nxv2i32.i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i32 %x, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| declare <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32, i64) |
| define <vscale x 2 x i32> @vpselect_vslide1down(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i32 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_vslide1down: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu |
| ; CHECK-NEXT: vslide1down.vx v8, v9, a0, v0.t |
| ; CHECK-NEXT: ret |
| %1 = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i32 %x, i64 %1) |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test vector operations with VLMAX vector length. |
| |
| ; Test binary operator with vp.select and add. |
| define <vscale x 2 x i32> @vpselect_add(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_add: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = add <vscale x 2 x i32> %x, %y |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test binary operator with vp.select and fadd. |
| define <vscale x 2 x float> @vpselect_fadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_fadd: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = fadd <vscale x 2 x float> %x, %y |
| %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test conversion by fptosi. |
| define <vscale x 2 x i16> @vpselect_fptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_fptosi: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu |
| ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %a = fptosi <vscale x 2 x float> %x to <vscale x 2 x i16> |
| %b = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl) |
| ret <vscale x 2 x i16> %b |
| } |
| |
| ; Test conversion by sitofp. |
| define <vscale x 2 x float> @vpselect_sitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_sitofp: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = sitofp <vscale x 2 x i64> %x to <vscale x 2 x float> |
| %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test float extension by fpext. |
| define <vscale x 2 x double> @vpselect_fpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_fpext: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = fpext <vscale x 2 x float> %x to <vscale x 2 x double> |
| %b = call <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) |
| ret <vscale x 2 x double> %b |
| } |
| |
| ; Test float truncation by fptrunc. |
| define <vscale x 2 x float> @vpselect_fptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_fptrunc: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = fptrunc <vscale x 2 x double> %x to <vscale x 2 x float> |
| %b = call <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; Test integer extension by zext. |
| define <vscale x 2 x i32> @vpselect_zext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_zext: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t |
| ; CHECK-NEXT: ret |
| %a = zext <vscale x 2 x i8> %x to <vscale x 2 x i32> |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test integer truncation by trunc. |
| define <vscale x 2 x i32> @vpselect_trunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpselect_trunc: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t |
| ; CHECK-NEXT: ret |
| %a = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32> |
| %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Folding this would create a loop in the DAG becuase the chain from the VLE is |
| ; used by the vssubu. |
| define void @test_dag_loop() { |
| ; CHECK-LABEL: test_dag_loop: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: vsetivli zero, 0, e8, m4, ta, ma |
| ; CHECK-NEXT: vmclr.m v0 |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vmv.v.i v12, 0 |
| ; CHECK-NEXT: vsetvli zero, zero, e8, m4, tu, mu |
| ; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t |
| ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma |
| ; CHECK-NEXT: vmseq.vv v0, v12, v8 |
| ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma |
| ; CHECK-NEXT: vmv.v.i v8, 0 |
| ; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, mu |
| ; CHECK-NEXT: vle16.v v8, (zero), v0.t |
| ; CHECK-NEXT: vse16.v v8, (zero) |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16> undef, ptr null, i64 1) |
| %1 = tail call <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8> zeroinitializer, <vscale x 32 x i8> zeroinitializer, i8 0, <vscale x 32 x i1> zeroinitializer, i64 0, i64 0) |
| %2 = tail call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> %1, <vscale x 32 x i8> zeroinitializer, i64 0) |
| %3 = tail call <vscale x 32 x i16> @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> zeroinitializer, <vscale x 32 x i16> %0, <vscale x 32 x i1> %2, i64 1) |
| call void @llvm.riscv.vse.nxv32i16.i64(<vscale x 32 x i16> %3, ptr null, i64 0) |
| ret void |
| } |
| |
| define <vscale x 1 x i16> @test_vaaddu(<vscale x 1 x i16> %var_11, i16 zeroext %var_9, <vscale x 1 x i1> %var_5, <vscale x 1 x i16> %var_0) { |
| ; CHECK-LABEL: test_vaaddu: |
| ; CHECK: # %bb.0: # %entry |
| ; CHECK-NEXT: csrwi vxrm, 0 |
| ; CHECK-NEXT: vsetivli zero, 3, e16, mf4, ta, mu |
| ; CHECK-NEXT: vaaddu.vx v9, v8, a0, v0.t |
| ; CHECK-NEXT: vmv1r.v v8, v9 |
| ; CHECK-NEXT: ret |
| entry: |
| %0 = tail call <vscale x 1 x i16> @llvm.riscv.vaaddu.nxv1i16.i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> %var_11, i16 %var_9, i64 0, i64 3) |
| %1 = tail call <vscale x 1 x i16> @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> poison, <vscale x 1 x i16> %var_0, <vscale x 1 x i16> %0, <vscale x 1 x i1> %var_5, i64 3) |
| ret <vscale x 1 x i16> %1 |
| } |
| |
| ; Test reductions don't have a vmerge folded into them, since the mask affects |
| ; the result. |
| |
| declare <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32( |
| <vscale x 2 x i32>, |
| <vscale x 2 x i32>, |
| <vscale x 2 x i32>, |
| i64) |
| |
| define <vscale x 2 x i32> @vredsum(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %vl) { |
| ; CHECK-LABEL: vredsum: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma |
| ; CHECK-NEXT: vmv1r.v v11, v8 |
| ; CHECK-NEXT: vredsum.vs v11, v9, v10 |
| ; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32( |
| <vscale x 2 x i32> %passthru, |
| <vscale x 2 x i32> %x, |
| <vscale x 2 x i32> %y, |
| i64 %vl) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32( |
| <vscale x 2 x float>, |
| <vscale x 2 x float>, |
| <vscale x 2 x float>, |
| i64, i64) |
| |
| define <vscale x 2 x float> @vfredusum(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i64 %vl) { |
| ; CHECK-LABEL: vfredusum: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: fsrmi a1, 0 |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma |
| ; CHECK-NEXT: vmv1r.v v11, v8 |
| ; CHECK-NEXT: vfredusum.vs v11, v9, v10 |
| ; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 |
| ; CHECK-NEXT: fsrm a1 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32( |
| <vscale x 2 x float> %passthru, |
| <vscale x 2 x float> %x, |
| <vscale x 2 x float> %y, |
| i64 0, i64 %vl) |
| %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| ; However we can fold it in if the mask is all ones. |
| define <vscale x 2 x i32> @vredsum_allones_mask(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %vl) { |
| ; CHECK-LABEL: vredsum_allones_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma |
| ; CHECK-NEXT: vredsum.vs v8, v9, v10 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32( |
| <vscale x 2 x i32> %passthru, |
| <vscale x 2 x i32> %x, |
| <vscale x 2 x i32> %y, |
| i64 %vl) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 -1), i64 %vl) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| define <vscale x 2 x float> @vfredusum_allones_mask(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, i64 %vl) { |
| ; CHECK-LABEL: vfredusum_allones_mask: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: fsrmi a1, 0 |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma |
| ; CHECK-NEXT: vfredusum.vs v8, v9, v10 |
| ; CHECK-NEXT: fsrm a1 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32( |
| <vscale x 2 x float> %passthru, |
| <vscale x 2 x float> %x, |
| <vscale x 2 x float> %y, |
| i64 0, i64 %vl) |
| %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 -1), i64 %vl) |
| ret <vscale x 2 x float> %b |
| } |
| |
| define <vscale x 2 x i32> @unfoldable_vredsum_allones_mask_diff_vl(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y) { |
| ; CHECK-LABEL: unfoldable_vredsum_allones_mask_diff_vl: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma |
| ; CHECK-NEXT: vmv1r.v v11, v8 |
| ; CHECK-NEXT: vredsum.vs v11, v9, v10 |
| ; CHECK-NEXT: vsetivli zero, 1, e32, m1, tu, ma |
| ; CHECK-NEXT: vmv.v.v v8, v11 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vredsum.nxv2i32.nxv2i32( |
| <vscale x 2 x i32> %passthru, |
| <vscale x 2 x i32> %x, |
| <vscale x 2 x i32> %y, |
| i64 -1) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> splat (i1 -1), i64 1) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16>, ptr nocapture, i64) |
| declare <vscale x 32 x i8> @llvm.riscv.vssubu.mask.nxv32i8.i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i8, <vscale x 32 x i1>, i64, i64 immarg) |
| declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>, <vscale x 32 x i8>, i64) |
| declare <vscale x 32 x i16> @llvm.riscv.vmerge.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i64) |
| declare void @llvm.riscv.vse.nxv32i16.i64(<vscale x 32 x i16>, ptr nocapture, i64) |
| declare <vscale x 1 x i16> @llvm.riscv.vaaddu.nxv1i16.i16.i64(<vscale x 1 x i16>, <vscale x 1 x i16>, i16, i64 immarg, i64) |
| declare <vscale x 1 x i16> @llvm.riscv.vmerge.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i64) |
| |
| ; Tests for folding vmerge into its ops when their VLs differ |
| |
| declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, i64) |
| declare <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i64) |
| |
| ; Can fold with VL=2 |
| define <vscale x 2 x i32> @vmerge_smaller_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { |
| ; CHECK-LABEL: vmerge_smaller_vl_same_passthru: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 4) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Can fold with VL=2 |
| define <vscale x 2 x i32> @vmerge_larger_vl_same_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { |
| ; CHECK-LABEL: vmerge_larger_vl_same_passthru: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Can fold with VL=2 |
| define <vscale x 2 x i32> @vmerge_smaller_vl_different_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { |
| ; CHECK-LABEL: vmerge_smaller_vl_different_passthru: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma |
| ; CHECK-NEXT: vadd.vv v8, v10, v11 |
| ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 |
| ; CHECK-NEXT: vmv1r.v v8, v9 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 3) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Can't fold this because we need to take elements from both %pt1 and %pt2 |
| define <vscale x 2 x i32> @vmerge_larger_vl_different_passthru(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { |
| ; CHECK-LABEL: vmerge_larger_vl_different_passthru: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma |
| ; CHECK-NEXT: vadd.vv v8, v10, v11 |
| ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma |
| ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 |
| ; CHECK-NEXT: vmv1r.v v8, v9 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt1, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %pt2, <vscale x 2 x i32> %pt2, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Can fold with VL=2 |
| define <vscale x 2 x i32> @vmerge_smaller_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { |
| ; CHECK-LABEL: vmerge_smaller_vl_poison_passthru: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 3) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 2) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Can fold with VL=2 |
| define <vscale x 2 x i32> @vmerge_larger_vl_poison_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { |
| ; CHECK-LABEL: vmerge_larger_vl_poison_passthru: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; The vadd's new policy should be tail undisturbed since the false op of the |
| ; vmerge moves from the the body to the tail, and we need to preserve it. |
| define <vscale x 2 x i32> @vmerge_larger_vl_false_becomes_tail(<vscale x 2 x i32> %false, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m) { |
| ; CHECK-LABEL: vmerge_larger_vl_false_becomes_tail: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 2) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(<vscale x 2 x i32> poison, <vscale x 2 x i32> %false, <vscale x 2 x i32> %a, <vscale x 2 x i1> %m, i64 3) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| ; Test widening pseudos with their TIED variant (passthru same as first op). |
| define <vscale x 2 x i64> @vpmerge_vwsub.w_tied(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %mask, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vwsub.w_tied: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vwsub.wv v8, v8, v12, v0.t |
| ; CHECK-NEXT: ret |
| %vl.zext = zext i32 %vl to i64 |
| %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(<vscale x 2 x i64> %passthru, <vscale x 2 x i64> %passthru, <vscale x 2 x i32> %y, i64 %vl.zext) |
| %b = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %passthru, i32 %vl) |
| ret <vscale x 2 x i64> %b |
| } |
| |
| define <vscale x 2 x double> @vpmerge_vfwsub.w_tied(<vscale x 2 x double> %passthru, <vscale x 2 x double> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %mask, i32 zeroext %vl) { |
| ; CHECK-LABEL: vpmerge_vfwsub.w_tied: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: fsrmi a1, 1 |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu |
| ; CHECK-NEXT: vfwsub.wv v8, v8, v12, v0.t |
| ; CHECK-NEXT: fsrm a1 |
| ; CHECK-NEXT: ret |
| %vl.zext = zext i32 %vl to i64 |
| %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(<vscale x 2 x double> %passthru, <vscale x 2 x double> %passthru, <vscale x 2 x float> %y, i64 1, i64 %vl.zext) |
| %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) |
| ret <vscale x 2 x double> %b |
| } |
| |
| define <vscale x 2 x i32> @true_tied_dest_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) { |
| ; CHECK-LABEL: true_tied_dest_vmerge_implicit_passthru: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vmacc.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vmacc.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i64 %avl, i64 0) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32( |
| <vscale x 2 x i32> poison, |
| <vscale x 2 x i32> %passthru, |
| <vscale x 2 x i32> %a, |
| <vscale x 2 x i1> %m, |
| i64 %avl |
| ) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl) { |
| ; CHECK-LABEL: true_mask_vmerge_implicit_passthru: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu |
| ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 2 x i32> @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i64 %avl, i64 0) |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32( |
| <vscale x 2 x i32> poison, |
| <vscale x 2 x i32> %passthru, |
| <vscale x 2 x i32> %a, |
| <vscale x 2 x i1> shufflevector(<vscale x 2 x i1> insertelement(<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), |
| i64 %avl |
| ) |
| ret <vscale x 2 x i32> %b |
| } |
| |
| |
| define <vscale x 2 x i32> @unfoldable_mismatched_sew(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 2 x i1> %mask, i64 %avl) { |
| ; CHECK-LABEL: unfoldable_mismatched_sew: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma |
| ; CHECK-NEXT: vadd.vv v9, v9, v10 |
| ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma |
| ; CHECK-NEXT: vmv.v.v v8, v9 |
| ; CHECK-NEXT: ret |
| %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 %avl) |
| %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32> |
| %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32( |
| <vscale x 2 x i32> %passthru, |
| <vscale x 2 x i32> %passthru, |
| <vscale x 2 x i32> %a.bitcast, |
| <vscale x 2 x i1> splat (i1 true), |
| i64 %avl |
| ) |
| ret <vscale x 2 x i32> %b |
| } |