| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V |
| ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V |
| ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+v,+zvbc < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVBC,RV32ZVBC64 |
| ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+v,+zvbc < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVBC,RV64ZVBC64 |
| ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+v,+experimental-zvbc32e < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVBC,RV32ZVBC32 |
| ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+v,+experimental-zvbc32e < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVBC,RV64ZVBC32 |
| |
| define <vscale x 1 x i8> @clmul_nxv1i8_vv(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv1i8_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv1i8_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv1i8_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf8 v10, v9 |
| ; RV32ZVBC64-NEXT: vzext.vf8 v9, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v9, v10 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv1i8_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf8 v10, v9 |
| ; RV64ZVBC64-NEXT: vzext.vf8 v9, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v9, v10 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv1i8_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv1i8_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e8, mf8, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 1 x i8> @llvm.clmul.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) |
| ret <vscale x 1 x i8> %v |
| } |
| |
| define <vscale x 1 x i8> @clmul_nxv1i8_vx(<vscale x 1 x i8> %va, i8 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv1i8_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e8, mf8, ta, ma |
| ; RV32V-NEXT: vmv.v.x v9, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv1i8_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e8, mf8, ta, ma |
| ; RV64V-NEXT: vmv.v.x v9, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv1i8_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v9, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf8 v10, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf8 v8, v9 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v10, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv1i8_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v9, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf8 v10, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf8 v8, v9 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v10, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv1i8_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv1i8_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 |
| %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer |
| %v = call <vscale x 1 x i8> @llvm.clmul.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) |
| ret <vscale x 1 x i8> %v |
| } |
| |
| define <vscale x 2 x i8> @clmul_nxv2i8_vv(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv2i8_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e8, mf4, ta, ma |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv2i8_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e8, mf4, ta, ma |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv2i8_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf8 v10, v9 |
| ; RV32ZVBC64-NEXT: vzext.vf8 v12, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v12, v10 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v10, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv2i8_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf8 v10, v9 |
| ; RV64ZVBC64-NEXT: vzext.vf8 v12, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v12, v10 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v10, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv2i8_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv2i8_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e8, mf4, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 2 x i8> @llvm.clmul.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) |
| ret <vscale x 2 x i8> %v |
| } |
| |
| define <vscale x 2 x i8> @clmul_nxv2i8_vx(<vscale x 2 x i8> %va, i8 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv2i8_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e8, mf4, ta, ma |
| ; RV32V-NEXT: vmv.v.x v9, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv2i8_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e8, mf4, ta, ma |
| ; RV64V-NEXT: vmv.v.x v9, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv2i8_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v12, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf8 v10, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf8 v8, v12 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v10, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v10, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv2i8_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v12, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf8 v10, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf8 v8, v12 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v10, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v10, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv2i8_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv2i8_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 |
| %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer |
| %v = call <vscale x 2 x i8> @llvm.clmul.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) |
| ret <vscale x 2 x i8> %v |
| } |
| |
| define <vscale x 4 x i8> @clmul_nxv4i8_vv(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv4i8_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e8, mf2, ta, ma |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv4i8_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e8, mf2, ta, ma |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv4i8_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf8 v12, v9 |
| ; RV32ZVBC64-NEXT: vzext.vf8 v16, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v16, v12 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v12, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv4i8_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf8 v12, v9 |
| ; RV64ZVBC64-NEXT: vzext.vf8 v16, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v16, v12 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v12, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv4i8_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv4i8_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e8, mf2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 4 x i8> @llvm.clmul.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) |
| ret <vscale x 4 x i8> %v |
| } |
| |
| define <vscale x 4 x i8> @clmul_nxv4i8_vx(<vscale x 4 x i8> %va, i8 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv4i8_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e8, mf2, ta, ma |
| ; RV32V-NEXT: vmv.v.x v9, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv4i8_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e8, mf2, ta, ma |
| ; RV64V-NEXT: vmv.v.x v9, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv4i8_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf8 v12, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf8 v8, v16 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v12, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v12, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv4i8_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf8 v12, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf8 v8, v16 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v12, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v12, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv4i8_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv4i8_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 |
| %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer |
| %v = call <vscale x 4 x i8> @llvm.clmul.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) |
| ret <vscale x 4 x i8> %v |
| } |
| |
| define <vscale x 8 x i8> @clmul_nxv8i8_vv(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv8i8_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e8, m1, ta, ma |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv8i8_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e8, m1, ta, ma |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv8i8_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf8 v16, v9 |
| ; RV32ZVBC64-NEXT: vzext.vf8 v24, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v24, v16 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v16, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v10, v16, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e8, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv8i8_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf8 v16, v9 |
| ; RV64ZVBC64-NEXT: vzext.vf8 v24, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v24, v16 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v16, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v10, v16, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e8, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv8i8_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e8, m1, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv8i8_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e8, m1, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 8 x i8> @llvm.clmul.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) |
| ret <vscale x 8 x i8> %v |
| } |
| |
| define <vscale x 8 x i8> @clmul_nxv8i8_vx(<vscale x 8 x i8> %va, i8 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv8i8_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e8, m1, ta, ma |
| ; RV32V-NEXT: vmv.v.x v9, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv8i8_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e8, m1, ta, ma |
| ; RV64V-NEXT: vmv.v.x v9, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv8i8_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e8, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v24, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf8 v16, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf8 v8, v24 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v16, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v16, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v10, v16, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e8, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv8i8_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e8, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v24, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf8 v16, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf8 v8, v24 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v16, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v16, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v10, v16, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e8, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv8i8_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e8, m1, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv8i8_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e8, m1, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 |
| %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer |
| %v = call <vscale x 8 x i8> @llvm.clmul.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) |
| ret <vscale x 8 x i8> %v |
| } |
| |
| define <vscale x 16 x i8> @clmul_nxv16i8_vv(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv16i8_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e8, m2, ta, ma |
| ; RV32V-NEXT: vand.vi v12, v10, 2 |
| ; RV32V-NEXT: vand.vi v14, v10, 1 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v14, v12 |
| ; RV32V-NEXT: vand.vi v14, v10, 4 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vi v14, v10, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v10, v10, a0 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vmul.vv v8, v8, v10 |
| ; RV32V-NEXT: vxor.vv v8, v12, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv16i8_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e8, m2, ta, ma |
| ; RV64V-NEXT: vand.vi v12, v10, 2 |
| ; RV64V-NEXT: vand.vi v14, v10, 1 |
| ; RV64V-NEXT: vmul.vv v12, v8, v12 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v14, v12 |
| ; RV64V-NEXT: vand.vi v14, v10, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vi v14, v10, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v10, v10, a0 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vmul.vv v8, v8, v10 |
| ; RV64V-NEXT: vxor.vv v8, v12, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv16i8_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e8, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vand.vi v12, v10, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v14, v10, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v14, v12 |
| ; RV32ZVBC64-NEXT: vand.vi v14, v10, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV32ZVBC64-NEXT: vand.vi v14, v10, 8 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV32ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV32ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV32ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vand.vx v10, v10, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v10 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v12, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv16i8_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e8, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vand.vi v12, v10, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v14, v10, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v12, v8, v12 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v14, v12 |
| ; RV64ZVBC64-NEXT: vand.vi v14, v10, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC64-NEXT: vand.vi v14, v10, 8 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vand.vx v10, v10, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v10 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v12, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv16i8_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e8, m2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v10 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv16i8_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e8, m2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v10 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 16 x i8> @llvm.clmul.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb) |
| ret <vscale x 16 x i8> %v |
| } |
| |
| define <vscale x 16 x i8> @clmul_nxv16i8_vx(<vscale x 16 x i8> %va, i8 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv16i8_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e8, m2, ta, ma |
| ; RV32V-NEXT: vmv.v.x v10, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v12, v10, 2 |
| ; RV32V-NEXT: vand.vi v14, v10, 1 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v14, v12 |
| ; RV32V-NEXT: vand.vi v14, v10, 4 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vi v14, v10, 8 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v10, v10, a0 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vmul.vv v8, v8, v10 |
| ; RV32V-NEXT: vxor.vv v8, v12, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv16i8_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e8, m2, ta, ma |
| ; RV64V-NEXT: vmv.v.x v10, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v12, v10, 2 |
| ; RV64V-NEXT: vand.vi v14, v10, 1 |
| ; RV64V-NEXT: vmul.vv v12, v8, v12 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v14, v12 |
| ; RV64V-NEXT: vand.vi v14, v10, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vi v14, v10, 8 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v10, v10, a0 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vmul.vv v8, v8, v10 |
| ; RV64V-NEXT: vxor.vv v8, v12, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv16i8_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e8, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v10, a0 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vand.vi v12, v10, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v14, v10, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v14, v12 |
| ; RV32ZVBC64-NEXT: vand.vi v14, v10, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV32ZVBC64-NEXT: vand.vi v14, v10, 8 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV32ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV32ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV32ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vand.vx v10, v10, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v10 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v12, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv16i8_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e8, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v10, a0 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vand.vi v12, v10, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v14, v10, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v12, v8, v12 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v14, v12 |
| ; RV64ZVBC64-NEXT: vand.vi v14, v10, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC64-NEXT: vand.vi v14, v10, 8 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC64-NEXT: vand.vx v14, v10, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vand.vx v10, v10, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC64-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v10 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v12, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv16i8_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e8, m2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv16i8_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e8, m2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 |
| %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer |
| %v = call <vscale x 16 x i8> @llvm.clmul.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb) |
| ret <vscale x 16 x i8> %v |
| } |
| |
| define <vscale x 32 x i8> @clmul_nxv32i8_vv(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv32i8_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e8, m4, ta, ma |
| ; RV32V-NEXT: vand.vi v16, v12, 2 |
| ; RV32V-NEXT: vand.vi v20, v12, 1 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v20, v16 |
| ; RV32V-NEXT: vand.vi v20, v12, 4 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vi v20, v12, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v12, v12, a0 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vmul.vv v8, v8, v12 |
| ; RV32V-NEXT: vxor.vv v8, v16, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv32i8_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e8, m4, ta, ma |
| ; RV64V-NEXT: vand.vi v16, v12, 2 |
| ; RV64V-NEXT: vand.vi v20, v12, 1 |
| ; RV64V-NEXT: vmul.vv v16, v8, v16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v20, v16 |
| ; RV64V-NEXT: vand.vi v20, v12, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vi v20, v12, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v12, v12, a0 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vmul.vv v8, v8, v12 |
| ; RV64V-NEXT: vxor.vv v8, v16, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv32i8_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e8, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vand.vi v16, v12, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v20, v16 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 8 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vand.vx v12, v12, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v12 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v16, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv32i8_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e8, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vand.vi v16, v12, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v16, v8, v16 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v20, v16 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 8 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vand.vx v12, v12, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v12 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v16, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv32i8_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e8, m4, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v12 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv32i8_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e8, m4, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v12 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 32 x i8> @llvm.clmul.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb) |
| ret <vscale x 32 x i8> %v |
| } |
| |
| define <vscale x 32 x i8> @clmul_nxv32i8_vx(<vscale x 32 x i8> %va, i8 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv32i8_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e8, m4, ta, ma |
| ; RV32V-NEXT: vmv.v.x v12, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v16, v12, 2 |
| ; RV32V-NEXT: vand.vi v20, v12, 1 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v20, v16 |
| ; RV32V-NEXT: vand.vi v20, v12, 4 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vi v20, v12, 8 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v12, v12, a0 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vmul.vv v8, v8, v12 |
| ; RV32V-NEXT: vxor.vv v8, v16, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv32i8_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e8, m4, ta, ma |
| ; RV64V-NEXT: vmv.v.x v12, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v16, v12, 2 |
| ; RV64V-NEXT: vand.vi v20, v12, 1 |
| ; RV64V-NEXT: vmul.vv v16, v8, v16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v20, v16 |
| ; RV64V-NEXT: vand.vi v20, v12, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vi v20, v12, 8 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v12, v12, a0 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vmul.vv v8, v8, v12 |
| ; RV64V-NEXT: vxor.vv v8, v16, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv32i8_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e8, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vand.vi v16, v12, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v20, v16 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 8 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vand.vx v12, v12, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v12 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v16, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv32i8_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e8, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vand.vi v16, v12, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v16, v8, v16 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v20, v16 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 8 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vand.vx v12, v12, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v12 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v16, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv32i8_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e8, m4, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv32i8_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e8, m4, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 |
| %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer |
| %v = call <vscale x 32 x i8> @llvm.clmul.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb) |
| ret <vscale x 32 x i8> %v |
| } |
| |
| define <vscale x 64 x i8> @clmul_nxv64i8_vv(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv64i8_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e8, m8, ta, ma |
| ; RV32V-NEXT: vand.vi v24, v16, 2 |
| ; RV32V-NEXT: vand.vi v0, v16, 1 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v0, v24 |
| ; RV32V-NEXT: vand.vi v0, v16, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vi v0, v16, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v16, v16, a0 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vmul.vv v8, v8, v16 |
| ; RV32V-NEXT: vxor.vv v8, v24, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv64i8_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e8, m8, ta, ma |
| ; RV64V-NEXT: vand.vi v24, v16, 2 |
| ; RV64V-NEXT: vand.vi v0, v16, 1 |
| ; RV64V-NEXT: vmul.vv v24, v8, v24 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v0, v24 |
| ; RV64V-NEXT: vand.vi v0, v16, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vi v0, v16, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v16, v16, a0 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vmul.vv v8, v8, v16 |
| ; RV64V-NEXT: vxor.vv v8, v24, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv64i8_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e8, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv64i8_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e8, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv64i8_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e8, m8, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v16 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv64i8_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e8, m8, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v16 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 64 x i8> @llvm.clmul.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb) |
| ret <vscale x 64 x i8> %v |
| } |
| |
| define <vscale x 64 x i8> @clmul_nxv64i8_vx(<vscale x 64 x i8> %va, i8 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv64i8_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e8, m8, ta, ma |
| ; RV32V-NEXT: vmv.v.x v16, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v24, v16, 2 |
| ; RV32V-NEXT: vand.vi v0, v16, 1 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v0, v24 |
| ; RV32V-NEXT: vand.vi v0, v16, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vi v0, v16, 8 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v16, v16, a0 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vmul.vv v8, v8, v16 |
| ; RV32V-NEXT: vxor.vv v8, v24, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv64i8_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e8, m8, ta, ma |
| ; RV64V-NEXT: vmv.v.x v16, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v24, v16, 2 |
| ; RV64V-NEXT: vand.vi v0, v16, 1 |
| ; RV64V-NEXT: vmul.vv v24, v8, v24 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v0, v24 |
| ; RV64V-NEXT: vand.vi v0, v16, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vi v0, v16, 8 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vand.vx v16, v16, a0 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vmul.vv v8, v8, v16 |
| ; RV64V-NEXT: vxor.vv v8, v24, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv64i8_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e8, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv64i8_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e8, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv64i8_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e8, m8, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv64i8_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e8, m8, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 |
| %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer |
| %v = call <vscale x 64 x i8> @llvm.clmul.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb) |
| ret <vscale x 64 x i8> %v |
| } |
| |
| define <vscale x 1 x i16> @clmul_nxv1i16_vv(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv1i16_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e16, mf4, ta, ma |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv1i16_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e16, mf4, ta, ma |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv1i16_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf4 v10, v9 |
| ; RV32ZVBC64-NEXT: vzext.vf4 v9, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v9, v10 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv1i16_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf4 v10, v9 |
| ; RV64ZVBC64-NEXT: vzext.vf4 v9, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v9, v10 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv1i16_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv1i16_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 1 x i16> @llvm.clmul.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) |
| ret <vscale x 1 x i16> %v |
| } |
| |
| define <vscale x 1 x i16> @clmul_nxv1i16_vx(<vscale x 1 x i16> %va, i16 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv1i16_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e16, mf4, ta, ma |
| ; RV32V-NEXT: vmv.v.x v9, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv1i16_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e16, mf4, ta, ma |
| ; RV64V-NEXT: vmv.v.x v9, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv1i16_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v9, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf4 v10, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf4 v8, v9 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v10, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv1i16_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v9, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf4 v10, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf4 v8, v9 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v10, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv1i16_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv1i16_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 |
| %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer |
| %v = call <vscale x 1 x i16> @llvm.clmul.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) |
| ret <vscale x 1 x i16> %v |
| } |
| |
| define <vscale x 2 x i16> @clmul_nxv2i16_vv(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv2i16_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv2i16_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv2i16_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf4 v10, v9 |
| ; RV32ZVBC64-NEXT: vzext.vf4 v12, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v12, v10 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v10, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv2i16_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf4 v10, v9 |
| ; RV64ZVBC64-NEXT: vzext.vf4 v12, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v12, v10 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v10, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv2i16_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv2i16_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 2 x i16> @llvm.clmul.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) |
| ret <vscale x 2 x i16> %v |
| } |
| |
| define <vscale x 2 x i16> @clmul_nxv2i16_vx(<vscale x 2 x i16> %va, i16 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv2i16_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e16, mf2, ta, ma |
| ; RV32V-NEXT: vmv.v.x v9, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv2i16_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e16, mf2, ta, ma |
| ; RV64V-NEXT: vmv.v.x v9, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv2i16_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v12, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf4 v10, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf4 v8, v12 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v10, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v10, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv2i16_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v12, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf4 v10, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf4 v8, v12 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v10, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v10, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv2i16_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv2i16_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 |
| %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer |
| %v = call <vscale x 2 x i16> @llvm.clmul.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) |
| ret <vscale x 2 x i16> %v |
| } |
| |
| define <vscale x 4 x i16> @clmul_nxv4i16_vv(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv4i16_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e16, m1, ta, ma |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv4i16_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e16, m1, ta, ma |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv4i16_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf4 v12, v9 |
| ; RV32ZVBC64-NEXT: vzext.vf4 v16, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v16, v12 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v12, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv4i16_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf4 v12, v9 |
| ; RV64ZVBC64-NEXT: vzext.vf4 v16, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v16, v12 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v12, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv4i16_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e16, m1, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv4i16_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e16, m1, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 4 x i16> @llvm.clmul.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) |
| ret <vscale x 4 x i16> %v |
| } |
| |
| define <vscale x 4 x i16> @clmul_nxv4i16_vx(<vscale x 4 x i16> %va, i16 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv4i16_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e16, m1, ta, ma |
| ; RV32V-NEXT: vmv.v.x v9, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv4i16_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e16, m1, ta, ma |
| ; RV64V-NEXT: vmv.v.x v9, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv4i16_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e16, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf4 v12, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf4 v8, v16 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v12, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v12, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv4i16_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e16, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf4 v12, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf4 v8, v16 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v12, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v12, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv4i16_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e16, m1, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv4i16_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e16, m1, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 |
| %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer |
| %v = call <vscale x 4 x i16> @llvm.clmul.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) |
| ret <vscale x 4 x i16> %v |
| } |
| |
| define <vscale x 8 x i16> @clmul_nxv8i16_vv(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv8i16_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e16, m2, ta, ma |
| ; RV32V-NEXT: vand.vi v12, v10, 2 |
| ; RV32V-NEXT: vand.vi v14, v10, 1 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v14, v12 |
| ; RV32V-NEXT: vand.vi v14, v10, 4 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vi v14, v10, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v10, v10, a0 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vmul.vv v8, v8, v10 |
| ; RV32V-NEXT: vxor.vv v8, v12, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv8i16_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e16, m2, ta, ma |
| ; RV64V-NEXT: vand.vi v12, v10, 2 |
| ; RV64V-NEXT: vand.vi v14, v10, 1 |
| ; RV64V-NEXT: vmul.vv v12, v8, v12 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v14, v12 |
| ; RV64V-NEXT: vand.vi v14, v10, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vi v14, v10, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v10, v10, a0 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vmul.vv v8, v8, v10 |
| ; RV64V-NEXT: vxor.vv v8, v12, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv8i16_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf4 v16, v10 |
| ; RV32ZVBC64-NEXT: vzext.vf4 v24, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v24, v16 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v16, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v16, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv8i16_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf4 v16, v10 |
| ; RV64ZVBC64-NEXT: vzext.vf4 v24, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v24, v16 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v16, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v16, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv8i16_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e16, m2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v10 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv8i16_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e16, m2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v10 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 8 x i16> @llvm.clmul.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) |
| ret <vscale x 8 x i16> %v |
| } |
| |
| define <vscale x 8 x i16> @clmul_nxv8i16_vx(<vscale x 8 x i16> %va, i16 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv8i16_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e16, m2, ta, ma |
| ; RV32V-NEXT: vmv.v.x v10, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v12, v10, 2 |
| ; RV32V-NEXT: vand.vi v14, v10, 1 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v14, v12 |
| ; RV32V-NEXT: vand.vi v14, v10, 4 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vi v14, v10, 8 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v10, v10, a0 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vmul.vv v8, v8, v10 |
| ; RV32V-NEXT: vxor.vv v8, v12, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv8i16_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e16, m2, ta, ma |
| ; RV64V-NEXT: vmv.v.x v10, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v12, v10, 2 |
| ; RV64V-NEXT: vand.vi v14, v10, 1 |
| ; RV64V-NEXT: vmul.vv v12, v8, v12 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v14, v12 |
| ; RV64V-NEXT: vand.vi v14, v10, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vi v14, v10, 8 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v10, v10, a0 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vmul.vv v8, v8, v10 |
| ; RV64V-NEXT: vxor.vv v8, v12, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv8i16_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e16, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v24, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf4 v16, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf4 v8, v24 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v16, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v16, v8, 0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e16, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v16, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv8i16_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e16, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v24, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf4 v16, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf4 v8, v24 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v16, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v16, v8, 0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e16, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v16, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv8i16_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e16, m2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv8i16_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e16, m2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 |
| %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer |
| %v = call <vscale x 8 x i16> @llvm.clmul.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) |
| ret <vscale x 8 x i16> %v |
| } |
| |
| define <vscale x 16 x i16> @clmul_nxv16i16_vv(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv16i16_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e16, m4, ta, ma |
| ; RV32V-NEXT: vand.vi v16, v12, 2 |
| ; RV32V-NEXT: vand.vi v20, v12, 1 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v20, v16 |
| ; RV32V-NEXT: vand.vi v20, v12, 4 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vi v20, v12, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v12, v12, a0 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vmul.vv v8, v8, v12 |
| ; RV32V-NEXT: vxor.vv v8, v16, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv16i16_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e16, m4, ta, ma |
| ; RV64V-NEXT: vand.vi v16, v12, 2 |
| ; RV64V-NEXT: vand.vi v20, v12, 1 |
| ; RV64V-NEXT: vmul.vv v16, v8, v16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v20, v16 |
| ; RV64V-NEXT: vand.vi v20, v12, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vi v20, v12, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v12, v12, a0 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vmul.vv v8, v8, v12 |
| ; RV64V-NEXT: vxor.vv v8, v16, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv16i16_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e16, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vand.vi v16, v12, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v20, v16 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 8 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 256 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 512 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 1024 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 1 |
| ; RV32ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 2 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 8 |
| ; RV32ZVBC64-NEXT: vand.vx v12, v12, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v12 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v16, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv16i16_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e16, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vand.vi v16, v12, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v16, v8, v16 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v20, v16 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 8 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 256 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 512 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1024 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1 |
| ; RV64ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 2 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 8 |
| ; RV64ZVBC64-NEXT: vand.vx v12, v12, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v12 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v16, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv16i16_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e16, m4, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v12 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv16i16_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e16, m4, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v12 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 16 x i16> @llvm.clmul.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb) |
| ret <vscale x 16 x i16> %v |
| } |
| |
| define <vscale x 16 x i16> @clmul_nxv16i16_vx(<vscale x 16 x i16> %va, i16 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv16i16_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e16, m4, ta, ma |
| ; RV32V-NEXT: vmv.v.x v12, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v16, v12, 2 |
| ; RV32V-NEXT: vand.vi v20, v12, 1 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v20, v16 |
| ; RV32V-NEXT: vand.vi v20, v12, 4 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vi v20, v12, 8 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v12, v12, a0 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vmul.vv v8, v8, v12 |
| ; RV32V-NEXT: vxor.vv v8, v16, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv16i16_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e16, m4, ta, ma |
| ; RV64V-NEXT: vmv.v.x v12, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v16, v12, 2 |
| ; RV64V-NEXT: vand.vi v20, v12, 1 |
| ; RV64V-NEXT: vmul.vv v16, v8, v16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v20, v16 |
| ; RV64V-NEXT: vand.vi v20, v12, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vi v20, v12, 8 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v12, v12, a0 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vmul.vv v8, v8, v12 |
| ; RV64V-NEXT: vxor.vv v8, v16, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv16i16_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e16, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vand.vi v16, v12, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v20, v16 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vi v20, v12, 8 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 256 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 512 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 1024 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: li a0, 1 |
| ; RV32ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 2 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 8 |
| ; RV32ZVBC64-NEXT: vand.vx v12, v12, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v12 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v16, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv16i16_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e16, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vand.vi v16, v12, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v16, v8, v16 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v20, v16 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vi v20, v12, 8 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 256 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 512 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1024 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1 |
| ; RV64ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 2 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 8 |
| ; RV64ZVBC64-NEXT: vand.vx v12, v12, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC64-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v12 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v16, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv16i16_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e16, m4, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv16i16_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e16, m4, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 |
| %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer |
| %v = call <vscale x 16 x i16> @llvm.clmul.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb) |
| ret <vscale x 16 x i16> %v |
| } |
| |
| define <vscale x 32 x i16> @clmul_nxv32i16_vv(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv32i16_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e16, m8, ta, ma |
| ; RV32V-NEXT: vand.vi v24, v16, 2 |
| ; RV32V-NEXT: vand.vi v0, v16, 1 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v0, v24 |
| ; RV32V-NEXT: vand.vi v0, v16, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vi v0, v16, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v16, v16, a0 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vmul.vv v8, v8, v16 |
| ; RV32V-NEXT: vxor.vv v8, v24, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv32i16_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e16, m8, ta, ma |
| ; RV64V-NEXT: vand.vi v24, v16, 2 |
| ; RV64V-NEXT: vand.vi v0, v16, 1 |
| ; RV64V-NEXT: vmul.vv v24, v8, v24 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v0, v24 |
| ; RV64V-NEXT: vand.vi v0, v16, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vi v0, v16, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v16, v16, a0 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vmul.vv v8, v8, v16 |
| ; RV64V-NEXT: vxor.vv v8, v24, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv32i16_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e16, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 256 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 512 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 1024 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 1 |
| ; RV32ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 2 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 8 |
| ; RV32ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv32i16_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e16, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 256 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 512 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1024 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1 |
| ; RV64ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 2 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 8 |
| ; RV64ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv32i16_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e16, m8, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v16 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv32i16_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e16, m8, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v16 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 32 x i16> @llvm.clmul.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb) |
| ret <vscale x 32 x i16> %v |
| } |
| |
| define <vscale x 32 x i16> @clmul_nxv32i16_vx(<vscale x 32 x i16> %va, i16 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv32i16_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a1, zero, e16, m8, ta, ma |
| ; RV32V-NEXT: vmv.v.x v16, a0 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vand.vi v24, v16, 2 |
| ; RV32V-NEXT: vand.vi v0, v16, 1 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v0, v24 |
| ; RV32V-NEXT: vand.vi v0, v16, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vi v0, v16, 8 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v16, v16, a0 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vmul.vv v8, v8, v16 |
| ; RV32V-NEXT: vxor.vv v8, v24, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv32i16_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e16, m8, ta, ma |
| ; RV64V-NEXT: vmv.v.x v16, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v24, v16, 2 |
| ; RV64V-NEXT: vand.vi v0, v16, 1 |
| ; RV64V-NEXT: vmul.vv v24, v8, v24 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v0, v24 |
| ; RV64V-NEXT: vand.vi v0, v16, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vi v0, v16, 8 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vand.vx v16, v16, a0 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vmul.vv v8, v8, v16 |
| ; RV64V-NEXT: vxor.vv v8, v24, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv32i16_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e16, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 256 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 512 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 1024 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 1 |
| ; RV32ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 2 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 8 |
| ; RV32ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv32i16_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e16, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 256 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 512 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1024 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1 |
| ; RV64ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 2 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 8 |
| ; RV64ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv32i16_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e16, m8, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv32i16_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e16, m8, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 |
| %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer |
| %v = call <vscale x 32 x i16> @llvm.clmul.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb) |
| ret <vscale x 32 x i16> %v |
| } |
| |
| define <vscale x 1 x i32> @clmul_nxv1i32_vv(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv1i32_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e32, mf2, ta, ma |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 128 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 256 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 512 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 1024 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 2048 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 4096 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 8192 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 16384 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 32768 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 65536 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 131072 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 524288 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv1i32_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e32, mf2, ta, ma |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2048 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4096 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8192 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 16384 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 32768 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 65536 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 131072 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 262144 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 524288 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv1i32_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf2 v10, v9 |
| ; RV32ZVBC64-NEXT: vzext.vf2 v9, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v9, v10 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv1i32_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf2 v10, v9 |
| ; RV64ZVBC64-NEXT: vzext.vf2 v9, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v9, v10 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv1i32_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv1i32_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 1 x i32> @llvm.clmul.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) |
| ret <vscale x 1 x i32> %v |
| } |
| |
| define <vscale x 1 x i32> @clmul_nxv1i32_vx(<vscale x 1 x i32> %va, i32 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv1i32_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -32 |
| ; RV32V-NEXT: sw s0, 28(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 24(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 20(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 16(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 12(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 8(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 4(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 0(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: andi s6, a0, 2 |
| ; RV32V-NEXT: andi s5, a0, 1 |
| ; RV32V-NEXT: andi s3, a0, 4 |
| ; RV32V-NEXT: andi s1, a0, 8 |
| ; RV32V-NEXT: andi t6, a0, 16 |
| ; RV32V-NEXT: andi t4, a0, 32 |
| ; RV32V-NEXT: andi t2, a0, 64 |
| ; RV32V-NEXT: andi t0, a0, 128 |
| ; RV32V-NEXT: andi a6, a0, 256 |
| ; RV32V-NEXT: andi a4, a0, 512 |
| ; RV32V-NEXT: andi a2, a0, 1024 |
| ; RV32V-NEXT: li a1, 1 |
| ; RV32V-NEXT: lui a3, 1 |
| ; RV32V-NEXT: lui a5, 2 |
| ; RV32V-NEXT: lui a7, 4 |
| ; RV32V-NEXT: lui t1, 8 |
| ; RV32V-NEXT: lui t3, 16 |
| ; RV32V-NEXT: lui t5, 32 |
| ; RV32V-NEXT: lui s0, 64 |
| ; RV32V-NEXT: lui s2, 128 |
| ; RV32V-NEXT: lui s4, 256 |
| ; RV32V-NEXT: vsetvli s7, zero, e32, mf2, ta, ma |
| ; RV32V-NEXT: vmul.vx v9, v8, s6 |
| ; RV32V-NEXT: lui s6, 512 |
| ; RV32V-NEXT: vmul.vx v10, v8, s5 |
| ; RV32V-NEXT: lui s5, 1024 |
| ; RV32V-NEXT: vxor.vv v9, v10, v9 |
| ; RV32V-NEXT: vmul.vx v10, v8, s3 |
| ; RV32V-NEXT: lui s3, 2048 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s1 |
| ; RV32V-NEXT: lui s1, 4096 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t6 |
| ; RV32V-NEXT: lui t6, 8192 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t4 |
| ; RV32V-NEXT: lui t4, 16384 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t2 |
| ; RV32V-NEXT: lui t2, 32768 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t0 |
| ; RV32V-NEXT: lui t0, 65536 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a6 |
| ; RV32V-NEXT: lui a6, 131072 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a4 |
| ; RV32V-NEXT: lui a4, 262144 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a2 |
| ; RV32V-NEXT: lui a2, 524288 |
| ; RV32V-NEXT: slli a1, a1, 11 |
| ; RV32V-NEXT: and a3, a0, a3 |
| ; RV32V-NEXT: and a5, a0, a5 |
| ; RV32V-NEXT: and a7, a0, a7 |
| ; RV32V-NEXT: and t1, a0, t1 |
| ; RV32V-NEXT: and t3, a0, t3 |
| ; RV32V-NEXT: and t5, a0, t5 |
| ; RV32V-NEXT: and s0, a0, s0 |
| ; RV32V-NEXT: and s2, a0, s2 |
| ; RV32V-NEXT: and s4, a0, s4 |
| ; RV32V-NEXT: and s6, a0, s6 |
| ; RV32V-NEXT: and s5, a0, s5 |
| ; RV32V-NEXT: and s3, a0, s3 |
| ; RV32V-NEXT: and s1, a0, s1 |
| ; RV32V-NEXT: and t6, a0, t6 |
| ; RV32V-NEXT: and t4, a0, t4 |
| ; RV32V-NEXT: and t2, a0, t2 |
| ; RV32V-NEXT: and t0, a0, t0 |
| ; RV32V-NEXT: and a6, a0, a6 |
| ; RV32V-NEXT: and a4, a0, a4 |
| ; RV32V-NEXT: and a2, a0, a2 |
| ; RV32V-NEXT: and a0, a0, a1 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a0 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a3 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a5 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a7 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t1 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t3 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t5 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s0 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s2 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s4 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s6 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s5 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s3 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s1 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t6 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t4 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t2 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t0 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a6 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a4 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v8, v8, a2 |
| ; RV32V-NEXT: vxor.vv v8, v9, v8 |
| ; RV32V-NEXT: lw s0, 28(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 24(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 20(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 16(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 12(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 8(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 4(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 0(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 32 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv1i32_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e32, mf2, ta, ma |
| ; RV64V-NEXT: vmv.v.x v9, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2048 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4096 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8192 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 16384 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 32768 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 65536 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 131072 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 262144 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 524288 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv1i32_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v9, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf2 v10, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf2 v8, v9 |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v10, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv1i32_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v9, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf2 v10, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf2 v8, v9 |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v10, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v8, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv1i32_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv1i32_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 |
| %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer |
| %v = call <vscale x 1 x i32> @llvm.clmul.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) |
| ret <vscale x 1 x i32> %v |
| } |
| |
| define <vscale x 2 x i32> @clmul_nxv2i32_vv(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv2i32_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e32, m1, ta, ma |
| ; RV32V-NEXT: vand.vi v10, v9, 2 |
| ; RV32V-NEXT: vand.vi v11, v9, 1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v11, v10 |
| ; RV32V-NEXT: vand.vi v11, v9, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 16 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 32 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 128 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 256 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 512 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 1024 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 2048 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 4096 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 8192 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 16384 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 32768 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 65536 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 131072 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vand.vx v11, v9, a0 |
| ; RV32V-NEXT: lui a0, 524288 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v10, v10, v11 |
| ; RV32V-NEXT: vmul.vv v8, v8, v9 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv2i32_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e32, m1, ta, ma |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2048 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4096 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8192 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 16384 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 32768 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 65536 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 131072 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 262144 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 524288 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv2i32_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf2 v10, v9 |
| ; RV32ZVBC64-NEXT: vzext.vf2 v12, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v10, v12, v10 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv2i32_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf2 v10, v9 |
| ; RV64ZVBC64-NEXT: vzext.vf2 v12, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v10, v12, v10 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv2i32_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e32, m1, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv2i32_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e32, m1, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v9 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 2 x i32> @llvm.clmul.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) |
| ret <vscale x 2 x i32> %v |
| } |
| |
| define <vscale x 2 x i32> @clmul_nxv2i32_vx(<vscale x 2 x i32> %va, i32 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv2i32_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -32 |
| ; RV32V-NEXT: sw s0, 28(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 24(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 20(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 16(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 12(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 8(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 4(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 0(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: andi s6, a0, 2 |
| ; RV32V-NEXT: andi s5, a0, 1 |
| ; RV32V-NEXT: andi s3, a0, 4 |
| ; RV32V-NEXT: andi s1, a0, 8 |
| ; RV32V-NEXT: andi t6, a0, 16 |
| ; RV32V-NEXT: andi t4, a0, 32 |
| ; RV32V-NEXT: andi t2, a0, 64 |
| ; RV32V-NEXT: andi t0, a0, 128 |
| ; RV32V-NEXT: andi a6, a0, 256 |
| ; RV32V-NEXT: andi a4, a0, 512 |
| ; RV32V-NEXT: andi a2, a0, 1024 |
| ; RV32V-NEXT: li a1, 1 |
| ; RV32V-NEXT: lui a3, 1 |
| ; RV32V-NEXT: lui a5, 2 |
| ; RV32V-NEXT: lui a7, 4 |
| ; RV32V-NEXT: lui t1, 8 |
| ; RV32V-NEXT: lui t3, 16 |
| ; RV32V-NEXT: lui t5, 32 |
| ; RV32V-NEXT: lui s0, 64 |
| ; RV32V-NEXT: lui s2, 128 |
| ; RV32V-NEXT: lui s4, 256 |
| ; RV32V-NEXT: vsetvli s7, zero, e32, m1, ta, ma |
| ; RV32V-NEXT: vmul.vx v9, v8, s6 |
| ; RV32V-NEXT: lui s6, 512 |
| ; RV32V-NEXT: vmul.vx v10, v8, s5 |
| ; RV32V-NEXT: lui s5, 1024 |
| ; RV32V-NEXT: vxor.vv v9, v10, v9 |
| ; RV32V-NEXT: vmul.vx v10, v8, s3 |
| ; RV32V-NEXT: lui s3, 2048 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s1 |
| ; RV32V-NEXT: lui s1, 4096 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t6 |
| ; RV32V-NEXT: lui t6, 8192 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t4 |
| ; RV32V-NEXT: lui t4, 16384 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t2 |
| ; RV32V-NEXT: lui t2, 32768 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t0 |
| ; RV32V-NEXT: lui t0, 65536 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a6 |
| ; RV32V-NEXT: lui a6, 131072 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a4 |
| ; RV32V-NEXT: lui a4, 262144 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a2 |
| ; RV32V-NEXT: lui a2, 524288 |
| ; RV32V-NEXT: slli a1, a1, 11 |
| ; RV32V-NEXT: and a3, a0, a3 |
| ; RV32V-NEXT: and a5, a0, a5 |
| ; RV32V-NEXT: and a7, a0, a7 |
| ; RV32V-NEXT: and t1, a0, t1 |
| ; RV32V-NEXT: and t3, a0, t3 |
| ; RV32V-NEXT: and t5, a0, t5 |
| ; RV32V-NEXT: and s0, a0, s0 |
| ; RV32V-NEXT: and s2, a0, s2 |
| ; RV32V-NEXT: and s4, a0, s4 |
| ; RV32V-NEXT: and s6, a0, s6 |
| ; RV32V-NEXT: and s5, a0, s5 |
| ; RV32V-NEXT: and s3, a0, s3 |
| ; RV32V-NEXT: and s1, a0, s1 |
| ; RV32V-NEXT: and t6, a0, t6 |
| ; RV32V-NEXT: and t4, a0, t4 |
| ; RV32V-NEXT: and t2, a0, t2 |
| ; RV32V-NEXT: and t0, a0, t0 |
| ; RV32V-NEXT: and a6, a0, a6 |
| ; RV32V-NEXT: and a4, a0, a4 |
| ; RV32V-NEXT: and a2, a0, a2 |
| ; RV32V-NEXT: and a0, a0, a1 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a0 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a3 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a5 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a7 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t1 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t3 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t5 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s0 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s2 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s4 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s6 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s5 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s3 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, s1 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t6 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t4 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t2 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, t0 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a6 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v10, v8, a4 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vx v8, v8, a2 |
| ; RV32V-NEXT: vxor.vv v8, v9, v8 |
| ; RV32V-NEXT: lw s0, 28(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 24(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 20(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 16(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 12(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 8(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 4(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 0(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 32 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv2i32_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e32, m1, ta, ma |
| ; RV64V-NEXT: vmv.v.x v9, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 2048 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 4096 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 8192 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 16384 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 32768 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 65536 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 131072 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 262144 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: lui a0, 524288 |
| ; RV64V-NEXT: vand.vx v9, v9, a0 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv2i32_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e32, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v12, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf2 v10, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf2 v8, v12 |
| ; RV32ZVBC64-NEXT: vclmul.vv v10, v10, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv2i32_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e32, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v12, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf2 v10, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf2 v8, v12 |
| ; RV64ZVBC64-NEXT: vclmul.vv v10, v10, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v10, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv2i32_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e32, m1, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv2i32_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e32, m1, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 |
| %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer |
| %v = call <vscale x 2 x i32> @llvm.clmul.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) |
| ret <vscale x 2 x i32> %v |
| } |
| |
| define <vscale x 4 x i32> @clmul_nxv4i32_vv(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv4i32_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e32, m2, ta, ma |
| ; RV32V-NEXT: vand.vi v12, v10, 2 |
| ; RV32V-NEXT: vand.vi v14, v10, 1 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v14, v12 |
| ; RV32V-NEXT: vand.vi v14, v10, 4 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vi v14, v10, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 16 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 32 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 64 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 128 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 256 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 512 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 1024 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 2048 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 4096 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 8192 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 16384 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 32768 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 65536 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 131072 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vand.vx v14, v10, a0 |
| ; RV32V-NEXT: lui a0, 524288 |
| ; RV32V-NEXT: vand.vx v10, v10, a0 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v12, v12, v14 |
| ; RV32V-NEXT: vmul.vv v8, v8, v10 |
| ; RV32V-NEXT: vxor.vv v8, v12, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv4i32_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e32, m2, ta, ma |
| ; RV64V-NEXT: vand.vi v12, v10, 2 |
| ; RV64V-NEXT: vand.vi v14, v10, 1 |
| ; RV64V-NEXT: vmul.vv v12, v8, v12 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v14, v12 |
| ; RV64V-NEXT: vand.vi v14, v10, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vi v14, v10, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 16 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 64 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 128 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 256 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 512 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 1024 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 2048 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 4096 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 8192 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 16384 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 32768 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 65536 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 131072 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 262144 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 524288 |
| ; RV64V-NEXT: vand.vx v10, v10, a0 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vmul.vv v8, v8, v10 |
| ; RV64V-NEXT: vxor.vv v8, v12, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv4i32_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf2 v12, v10 |
| ; RV32ZVBC64-NEXT: vzext.vf2 v16, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v12, v16, v12 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv4i32_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf2 v12, v10 |
| ; RV64ZVBC64-NEXT: vzext.vf2 v16, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v12, v16, v12 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv4i32_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e32, m2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v10 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv4i32_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e32, m2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v10 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 4 x i32> @llvm.clmul.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) |
| ret <vscale x 4 x i32> %v |
| } |
| |
| define <vscale x 4 x i32> @clmul_nxv4i32_vx(<vscale x 4 x i32> %va, i32 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv4i32_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -32 |
| ; RV32V-NEXT: sw s0, 28(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 24(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 20(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 16(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 12(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 8(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 4(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 0(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: andi s6, a0, 2 |
| ; RV32V-NEXT: andi s5, a0, 1 |
| ; RV32V-NEXT: andi s3, a0, 4 |
| ; RV32V-NEXT: andi s1, a0, 8 |
| ; RV32V-NEXT: andi t6, a0, 16 |
| ; RV32V-NEXT: andi t4, a0, 32 |
| ; RV32V-NEXT: andi t2, a0, 64 |
| ; RV32V-NEXT: andi t0, a0, 128 |
| ; RV32V-NEXT: andi a6, a0, 256 |
| ; RV32V-NEXT: andi a4, a0, 512 |
| ; RV32V-NEXT: andi a2, a0, 1024 |
| ; RV32V-NEXT: li a1, 1 |
| ; RV32V-NEXT: lui a3, 1 |
| ; RV32V-NEXT: lui a5, 2 |
| ; RV32V-NEXT: lui a7, 4 |
| ; RV32V-NEXT: lui t1, 8 |
| ; RV32V-NEXT: lui t3, 16 |
| ; RV32V-NEXT: lui t5, 32 |
| ; RV32V-NEXT: lui s0, 64 |
| ; RV32V-NEXT: lui s2, 128 |
| ; RV32V-NEXT: lui s4, 256 |
| ; RV32V-NEXT: vsetvli s7, zero, e32, m2, ta, ma |
| ; RV32V-NEXT: vmul.vx v10, v8, s6 |
| ; RV32V-NEXT: lui s6, 512 |
| ; RV32V-NEXT: vmul.vx v12, v8, s5 |
| ; RV32V-NEXT: lui s5, 1024 |
| ; RV32V-NEXT: vxor.vv v10, v12, v10 |
| ; RV32V-NEXT: vmul.vx v12, v8, s3 |
| ; RV32V-NEXT: lui s3, 2048 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, s1 |
| ; RV32V-NEXT: lui s1, 4096 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t6 |
| ; RV32V-NEXT: lui t6, 8192 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t4 |
| ; RV32V-NEXT: lui t4, 16384 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t2 |
| ; RV32V-NEXT: lui t2, 32768 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t0 |
| ; RV32V-NEXT: lui t0, 65536 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, a6 |
| ; RV32V-NEXT: lui a6, 131072 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, a4 |
| ; RV32V-NEXT: lui a4, 262144 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, a2 |
| ; RV32V-NEXT: lui a2, 524288 |
| ; RV32V-NEXT: slli a1, a1, 11 |
| ; RV32V-NEXT: and a3, a0, a3 |
| ; RV32V-NEXT: and a5, a0, a5 |
| ; RV32V-NEXT: and a7, a0, a7 |
| ; RV32V-NEXT: and t1, a0, t1 |
| ; RV32V-NEXT: and t3, a0, t3 |
| ; RV32V-NEXT: and t5, a0, t5 |
| ; RV32V-NEXT: and s0, a0, s0 |
| ; RV32V-NEXT: and s2, a0, s2 |
| ; RV32V-NEXT: and s4, a0, s4 |
| ; RV32V-NEXT: and s6, a0, s6 |
| ; RV32V-NEXT: and s5, a0, s5 |
| ; RV32V-NEXT: and s3, a0, s3 |
| ; RV32V-NEXT: and s1, a0, s1 |
| ; RV32V-NEXT: and t6, a0, t6 |
| ; RV32V-NEXT: and t4, a0, t4 |
| ; RV32V-NEXT: and t2, a0, t2 |
| ; RV32V-NEXT: and t0, a0, t0 |
| ; RV32V-NEXT: and a6, a0, a6 |
| ; RV32V-NEXT: and a4, a0, a4 |
| ; RV32V-NEXT: and a2, a0, a2 |
| ; RV32V-NEXT: and a0, a0, a1 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, a0 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, a3 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, a5 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, a7 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t1 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t3 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t5 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, s0 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, s2 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, s4 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, s6 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, s5 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, s3 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, s1 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t6 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t4 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t2 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, t0 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, a6 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v12, v8, a4 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vx v8, v8, a2 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: lw s0, 28(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 24(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 20(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 16(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 12(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 8(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 4(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 0(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 32 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv4i32_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e32, m2, ta, ma |
| ; RV64V-NEXT: vmv.v.x v10, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v12, v10, 2 |
| ; RV64V-NEXT: vand.vi v14, v10, 1 |
| ; RV64V-NEXT: vmul.vv v12, v8, v12 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v14, v12 |
| ; RV64V-NEXT: vand.vi v14, v10, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vi v14, v10, 8 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 16 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 64 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 128 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 256 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 512 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 1024 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 2048 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 4096 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 8192 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 16384 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 32768 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 65536 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 131072 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 262144 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: lui a0, 524288 |
| ; RV64V-NEXT: vand.vx v10, v10, a0 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vmul.vv v8, v8, v10 |
| ; RV64V-NEXT: vxor.vv v8, v12, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv4i32_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e32, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf2 v12, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf2 v8, v16 |
| ; RV32ZVBC64-NEXT: vclmul.vv v12, v12, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv4i32_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e32, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf2 v12, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf2 v8, v16 |
| ; RV64ZVBC64-NEXT: vclmul.vv v12, v12, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v12, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv4i32_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e32, m2, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv4i32_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e32, m2, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 |
| %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer |
| %v = call <vscale x 4 x i32> @llvm.clmul.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) |
| ret <vscale x 4 x i32> %v |
| } |
| |
| define <vscale x 8 x i32> @clmul_nxv8i32_vv(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv8i32_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e32, m4, ta, ma |
| ; RV32V-NEXT: vand.vi v16, v12, 2 |
| ; RV32V-NEXT: vand.vi v20, v12, 1 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v20, v16 |
| ; RV32V-NEXT: vand.vi v20, v12, 4 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vi v20, v12, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 16 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 32 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 64 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 128 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 256 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 512 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 1024 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 2048 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 4096 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 8192 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 16384 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 32768 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 65536 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 131072 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, a0 |
| ; RV32V-NEXT: lui a0, 524288 |
| ; RV32V-NEXT: vand.vx v12, v12, a0 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vmul.vv v8, v8, v12 |
| ; RV32V-NEXT: vxor.vv v8, v16, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv8i32_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e32, m4, ta, ma |
| ; RV64V-NEXT: vand.vi v16, v12, 2 |
| ; RV64V-NEXT: vand.vi v20, v12, 1 |
| ; RV64V-NEXT: vmul.vv v16, v8, v16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v20, v16 |
| ; RV64V-NEXT: vand.vi v20, v12, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vi v20, v12, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 64 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 128 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 256 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 512 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 1024 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 2048 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 4096 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 8192 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 16384 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 32768 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 65536 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 131072 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 262144 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 524288 |
| ; RV64V-NEXT: vand.vx v12, v12, a0 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vmul.vv v8, v8, v12 |
| ; RV64V-NEXT: vxor.vv v8, v16, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv8i32_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf2 v16, v12 |
| ; RV32ZVBC64-NEXT: vzext.vf2 v24, v8 |
| ; RV32ZVBC64-NEXT: vclmul.vv v16, v24, v16 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v16, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv8i32_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf2 v16, v12 |
| ; RV64ZVBC64-NEXT: vzext.vf2 v24, v8 |
| ; RV64ZVBC64-NEXT: vclmul.vv v16, v24, v16 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v16, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv8i32_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e32, m4, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v12 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv8i32_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e32, m4, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v12 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 8 x i32> @llvm.clmul.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) |
| ret <vscale x 8 x i32> %v |
| } |
| |
| define <vscale x 8 x i32> @clmul_nxv8i32_vx(<vscale x 8 x i32> %va, i32 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv8i32_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -32 |
| ; RV32V-NEXT: sw s0, 28(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 24(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 20(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 16(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 12(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 8(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 4(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 0(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: andi s6, a0, 2 |
| ; RV32V-NEXT: andi s5, a0, 1 |
| ; RV32V-NEXT: andi s3, a0, 4 |
| ; RV32V-NEXT: andi s1, a0, 8 |
| ; RV32V-NEXT: andi t6, a0, 16 |
| ; RV32V-NEXT: andi t4, a0, 32 |
| ; RV32V-NEXT: andi t2, a0, 64 |
| ; RV32V-NEXT: andi t0, a0, 128 |
| ; RV32V-NEXT: andi a6, a0, 256 |
| ; RV32V-NEXT: andi a4, a0, 512 |
| ; RV32V-NEXT: andi a2, a0, 1024 |
| ; RV32V-NEXT: li a1, 1 |
| ; RV32V-NEXT: lui a3, 1 |
| ; RV32V-NEXT: lui a5, 2 |
| ; RV32V-NEXT: lui a7, 4 |
| ; RV32V-NEXT: lui t1, 8 |
| ; RV32V-NEXT: lui t3, 16 |
| ; RV32V-NEXT: lui t5, 32 |
| ; RV32V-NEXT: lui s0, 64 |
| ; RV32V-NEXT: lui s2, 128 |
| ; RV32V-NEXT: lui s4, 256 |
| ; RV32V-NEXT: vsetvli s7, zero, e32, m4, ta, ma |
| ; RV32V-NEXT: vmul.vx v12, v8, s6 |
| ; RV32V-NEXT: lui s6, 512 |
| ; RV32V-NEXT: vmul.vx v16, v8, s5 |
| ; RV32V-NEXT: lui s5, 1024 |
| ; RV32V-NEXT: vxor.vv v12, v16, v12 |
| ; RV32V-NEXT: vmul.vx v16, v8, s3 |
| ; RV32V-NEXT: lui s3, 2048 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, s1 |
| ; RV32V-NEXT: lui s1, 4096 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t6 |
| ; RV32V-NEXT: lui t6, 8192 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t4 |
| ; RV32V-NEXT: lui t4, 16384 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t2 |
| ; RV32V-NEXT: lui t2, 32768 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t0 |
| ; RV32V-NEXT: lui t0, 65536 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, a6 |
| ; RV32V-NEXT: lui a6, 131072 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, a4 |
| ; RV32V-NEXT: lui a4, 262144 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, a2 |
| ; RV32V-NEXT: lui a2, 524288 |
| ; RV32V-NEXT: slli a1, a1, 11 |
| ; RV32V-NEXT: and a3, a0, a3 |
| ; RV32V-NEXT: and a5, a0, a5 |
| ; RV32V-NEXT: and a7, a0, a7 |
| ; RV32V-NEXT: and t1, a0, t1 |
| ; RV32V-NEXT: and t3, a0, t3 |
| ; RV32V-NEXT: and t5, a0, t5 |
| ; RV32V-NEXT: and s0, a0, s0 |
| ; RV32V-NEXT: and s2, a0, s2 |
| ; RV32V-NEXT: and s4, a0, s4 |
| ; RV32V-NEXT: and s6, a0, s6 |
| ; RV32V-NEXT: and s5, a0, s5 |
| ; RV32V-NEXT: and s3, a0, s3 |
| ; RV32V-NEXT: and s1, a0, s1 |
| ; RV32V-NEXT: and t6, a0, t6 |
| ; RV32V-NEXT: and t4, a0, t4 |
| ; RV32V-NEXT: and t2, a0, t2 |
| ; RV32V-NEXT: and t0, a0, t0 |
| ; RV32V-NEXT: and a6, a0, a6 |
| ; RV32V-NEXT: and a4, a0, a4 |
| ; RV32V-NEXT: and a2, a0, a2 |
| ; RV32V-NEXT: and a0, a0, a1 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, a0 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, a3 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, a5 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, a7 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t1 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t3 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t5 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, s0 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, s2 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, s4 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, s6 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, s5 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, s3 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, s1 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t6 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t4 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t2 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, t0 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, a6 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v16, v8, a4 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vx v8, v8, a2 |
| ; RV32V-NEXT: vxor.vv v8, v12, v8 |
| ; RV32V-NEXT: lw s0, 28(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 24(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 20(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 16(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 12(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 8(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 4(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 0(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 32 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv8i32_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e32, m4, ta, ma |
| ; RV64V-NEXT: vmv.v.x v12, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v16, v12, 2 |
| ; RV64V-NEXT: vand.vi v20, v12, 1 |
| ; RV64V-NEXT: vmul.vv v16, v8, v16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v20, v16 |
| ; RV64V-NEXT: vand.vi v20, v12, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vi v20, v12, 8 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 64 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 128 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 256 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 512 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 1024 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 2048 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 4096 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 8192 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 16384 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 32768 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 65536 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 131072 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 262144 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: lui a0, 524288 |
| ; RV64V-NEXT: vand.vx v12, v12, a0 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vmul.vv v8, v8, v12 |
| ; RV64V-NEXT: vxor.vv v8, v16, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv8i32_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e32, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vmv.v.x v24, a0 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e64, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vzext.vf2 v16, v8 |
| ; RV32ZVBC64-NEXT: vzext.vf2 v8, v24 |
| ; RV32ZVBC64-NEXT: vclmul.vv v16, v16, v8 |
| ; RV32ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vnsrl.wi v8, v16, 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv8i32_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e32, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v24, a0 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e64, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vzext.vf2 v16, v8 |
| ; RV64ZVBC64-NEXT: vzext.vf2 v8, v24 |
| ; RV64ZVBC64-NEXT: vclmul.vv v16, v16, v8 |
| ; RV64ZVBC64-NEXT: vsetvli zero, zero, e32, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vnsrl.wi v8, v16, 0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv8i32_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e32, m4, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv8i32_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e32, m4, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 |
| %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer |
| %v = call <vscale x 8 x i32> @llvm.clmul.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) |
| ret <vscale x 8 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @clmul_nxv16i32_vv(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv16i32_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetvli a0, zero, e32, m8, ta, ma |
| ; RV32V-NEXT: vand.vi v24, v16, 2 |
| ; RV32V-NEXT: vand.vi v0, v16, 1 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v0, v24 |
| ; RV32V-NEXT: vand.vi v0, v16, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vi v0, v16, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 32 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 256 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: li a0, 1 |
| ; RV32V-NEXT: slli a0, a0, 11 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 16 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 32 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 64 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 128 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 256 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 512 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 1024 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 2048 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 4096 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 8192 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 16384 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 32768 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 65536 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 131072 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: lui a0, 524288 |
| ; RV32V-NEXT: vand.vx v16, v16, a0 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vmul.vv v8, v8, v16 |
| ; RV32V-NEXT: vxor.vv v8, v24, v8 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv16i32_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e32, m8, ta, ma |
| ; RV64V-NEXT: vand.vi v24, v16, 2 |
| ; RV64V-NEXT: vand.vi v0, v16, 1 |
| ; RV64V-NEXT: vmul.vv v24, v8, v24 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v0, v24 |
| ; RV64V-NEXT: vand.vi v0, v16, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vi v0, v16, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 16 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 64 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 128 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 256 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 512 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 1024 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 2048 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 4096 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 8192 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 16384 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 32768 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 65536 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 131072 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 262144 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 524288 |
| ; RV64V-NEXT: vand.vx v16, v16, a0 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vmul.vv v8, v8, v16 |
| ; RV64V-NEXT: vxor.vv v8, v24, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv16i32_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e32, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV32ZVBC64-NEXT: li a0, 16 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 128 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 256 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 512 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 1024 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: li a0, 1 |
| ; RV32ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 1 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 2 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 4 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 8 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 16 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 32 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 64 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 128 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 256 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 512 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 1024 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 2048 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 4096 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 8192 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 16384 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 32768 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 65536 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 131072 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 262144 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC64-NEXT: lui a0, 524288 |
| ; RV32ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV32ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv16i32_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e32, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 256 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 512 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1024 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1 |
| ; RV64ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 2 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 8 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 16 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 128 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 256 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 512 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 1024 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 2048 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 4096 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 8192 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 16384 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 32768 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 65536 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 131072 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 262144 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 524288 |
| ; RV64ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv16i32_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e32, m8, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vv v8, v8, v16 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv16i32_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e32, m8, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vv v8, v8, v16 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 16 x i32> @llvm.clmul.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 16 x i32> @clmul_nxv16i32_vx(<vscale x 16 x i32> %va, i32 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv16i32_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -32 |
| ; RV32V-NEXT: sw s0, 28(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 24(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 20(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 16(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 12(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 8(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 4(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 0(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: andi s6, a0, 2 |
| ; RV32V-NEXT: andi s5, a0, 1 |
| ; RV32V-NEXT: andi s3, a0, 4 |
| ; RV32V-NEXT: andi s1, a0, 8 |
| ; RV32V-NEXT: andi t6, a0, 16 |
| ; RV32V-NEXT: andi t4, a0, 32 |
| ; RV32V-NEXT: andi t2, a0, 64 |
| ; RV32V-NEXT: andi t0, a0, 128 |
| ; RV32V-NEXT: andi a6, a0, 256 |
| ; RV32V-NEXT: andi a4, a0, 512 |
| ; RV32V-NEXT: andi a2, a0, 1024 |
| ; RV32V-NEXT: li a1, 1 |
| ; RV32V-NEXT: lui a3, 1 |
| ; RV32V-NEXT: lui a5, 2 |
| ; RV32V-NEXT: lui a7, 4 |
| ; RV32V-NEXT: lui t1, 8 |
| ; RV32V-NEXT: lui t3, 16 |
| ; RV32V-NEXT: lui t5, 32 |
| ; RV32V-NEXT: lui s0, 64 |
| ; RV32V-NEXT: lui s2, 128 |
| ; RV32V-NEXT: lui s4, 256 |
| ; RV32V-NEXT: vsetvli s7, zero, e32, m8, ta, ma |
| ; RV32V-NEXT: vmul.vx v16, v8, s6 |
| ; RV32V-NEXT: lui s6, 512 |
| ; RV32V-NEXT: vmul.vx v24, v8, s5 |
| ; RV32V-NEXT: lui s5, 1024 |
| ; RV32V-NEXT: vxor.vv v16, v24, v16 |
| ; RV32V-NEXT: vmul.vx v24, v8, s3 |
| ; RV32V-NEXT: lui s3, 2048 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, s1 |
| ; RV32V-NEXT: lui s1, 4096 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t6 |
| ; RV32V-NEXT: lui t6, 8192 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t4 |
| ; RV32V-NEXT: lui t4, 16384 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t2 |
| ; RV32V-NEXT: lui t2, 32768 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t0 |
| ; RV32V-NEXT: lui t0, 65536 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, a6 |
| ; RV32V-NEXT: lui a6, 131072 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, a4 |
| ; RV32V-NEXT: lui a4, 262144 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, a2 |
| ; RV32V-NEXT: lui a2, 524288 |
| ; RV32V-NEXT: slli a1, a1, 11 |
| ; RV32V-NEXT: and a3, a0, a3 |
| ; RV32V-NEXT: and a5, a0, a5 |
| ; RV32V-NEXT: and a7, a0, a7 |
| ; RV32V-NEXT: and t1, a0, t1 |
| ; RV32V-NEXT: and t3, a0, t3 |
| ; RV32V-NEXT: and t5, a0, t5 |
| ; RV32V-NEXT: and s0, a0, s0 |
| ; RV32V-NEXT: and s2, a0, s2 |
| ; RV32V-NEXT: and s4, a0, s4 |
| ; RV32V-NEXT: and s6, a0, s6 |
| ; RV32V-NEXT: and s5, a0, s5 |
| ; RV32V-NEXT: and s3, a0, s3 |
| ; RV32V-NEXT: and s1, a0, s1 |
| ; RV32V-NEXT: and t6, a0, t6 |
| ; RV32V-NEXT: and t4, a0, t4 |
| ; RV32V-NEXT: and t2, a0, t2 |
| ; RV32V-NEXT: and t0, a0, t0 |
| ; RV32V-NEXT: and a6, a0, a6 |
| ; RV32V-NEXT: and a4, a0, a4 |
| ; RV32V-NEXT: and a2, a0, a2 |
| ; RV32V-NEXT: and a0, a0, a1 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, a0 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, a3 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, a5 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, a7 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t1 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t3 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t5 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, s0 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, s2 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, s4 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, s6 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, s5 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, s3 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, s1 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t6 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t4 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t2 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, t0 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, a6 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v24, v8, a4 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vx v8, v8, a2 |
| ; RV32V-NEXT: vxor.vv v8, v16, v8 |
| ; RV32V-NEXT: lw s0, 28(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 24(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 20(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 16(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 12(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 8(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 4(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 0(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 32 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv16i32_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a1, zero, e32, m8, ta, ma |
| ; RV64V-NEXT: vmv.v.x v16, a0 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: vand.vi v24, v16, 2 |
| ; RV64V-NEXT: vand.vi v0, v16, 1 |
| ; RV64V-NEXT: vmul.vv v24, v8, v24 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v0, v24 |
| ; RV64V-NEXT: vand.vi v0, v16, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vi v0, v16, 8 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 128 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 512 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 1024 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: slli a0, a0, 11 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 1 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 2 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 8 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 16 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 64 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 128 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 256 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 512 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 1024 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 2048 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 4096 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 8192 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 16384 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 32768 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 65536 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 131072 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 262144 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: lui a0, 524288 |
| ; RV64V-NEXT: vand.vx v16, v16, a0 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vmul.vv v8, v8, v16 |
| ; RV64V-NEXT: vxor.vv v8, v24, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv16i32_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: addi sp, sp, -32 |
| ; RV32ZVBC64-NEXT: sw s0, 28(sp) # 4-byte Folded Spill |
| ; RV32ZVBC64-NEXT: sw s1, 24(sp) # 4-byte Folded Spill |
| ; RV32ZVBC64-NEXT: sw s2, 20(sp) # 4-byte Folded Spill |
| ; RV32ZVBC64-NEXT: sw s3, 16(sp) # 4-byte Folded Spill |
| ; RV32ZVBC64-NEXT: sw s4, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVBC64-NEXT: sw s5, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVBC64-NEXT: sw s6, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVBC64-NEXT: sw s7, 0(sp) # 4-byte Folded Spill |
| ; RV32ZVBC64-NEXT: andi s6, a0, 2 |
| ; RV32ZVBC64-NEXT: andi s5, a0, 1 |
| ; RV32ZVBC64-NEXT: andi s3, a0, 4 |
| ; RV32ZVBC64-NEXT: andi s1, a0, 8 |
| ; RV32ZVBC64-NEXT: andi t6, a0, 16 |
| ; RV32ZVBC64-NEXT: andi t4, a0, 32 |
| ; RV32ZVBC64-NEXT: andi t2, a0, 64 |
| ; RV32ZVBC64-NEXT: andi t0, a0, 128 |
| ; RV32ZVBC64-NEXT: andi a6, a0, 256 |
| ; RV32ZVBC64-NEXT: andi a4, a0, 512 |
| ; RV32ZVBC64-NEXT: andi a2, a0, 1024 |
| ; RV32ZVBC64-NEXT: li a1, 1 |
| ; RV32ZVBC64-NEXT: lui a3, 1 |
| ; RV32ZVBC64-NEXT: lui a5, 2 |
| ; RV32ZVBC64-NEXT: lui a7, 4 |
| ; RV32ZVBC64-NEXT: lui t1, 8 |
| ; RV32ZVBC64-NEXT: lui t3, 16 |
| ; RV32ZVBC64-NEXT: lui t5, 32 |
| ; RV32ZVBC64-NEXT: lui s0, 64 |
| ; RV32ZVBC64-NEXT: lui s2, 128 |
| ; RV32ZVBC64-NEXT: lui s4, 256 |
| ; RV32ZVBC64-NEXT: vsetvli s7, zero, e32, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vmul.vx v16, v8, s6 |
| ; RV32ZVBC64-NEXT: lui s6, 512 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, s5 |
| ; RV32ZVBC64-NEXT: lui s5, 1024 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v24, v16 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, s3 |
| ; RV32ZVBC64-NEXT: lui s3, 2048 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, s1 |
| ; RV32ZVBC64-NEXT: lui s1, 4096 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t6 |
| ; RV32ZVBC64-NEXT: lui t6, 8192 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t4 |
| ; RV32ZVBC64-NEXT: lui t4, 16384 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t2 |
| ; RV32ZVBC64-NEXT: lui t2, 32768 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t0 |
| ; RV32ZVBC64-NEXT: lui t0, 65536 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, a6 |
| ; RV32ZVBC64-NEXT: lui a6, 131072 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, a4 |
| ; RV32ZVBC64-NEXT: lui a4, 262144 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, a2 |
| ; RV32ZVBC64-NEXT: lui a2, 524288 |
| ; RV32ZVBC64-NEXT: slli a1, a1, 11 |
| ; RV32ZVBC64-NEXT: and a3, a0, a3 |
| ; RV32ZVBC64-NEXT: and a5, a0, a5 |
| ; RV32ZVBC64-NEXT: and a7, a0, a7 |
| ; RV32ZVBC64-NEXT: and t1, a0, t1 |
| ; RV32ZVBC64-NEXT: and t3, a0, t3 |
| ; RV32ZVBC64-NEXT: and t5, a0, t5 |
| ; RV32ZVBC64-NEXT: and s0, a0, s0 |
| ; RV32ZVBC64-NEXT: and s2, a0, s2 |
| ; RV32ZVBC64-NEXT: and s4, a0, s4 |
| ; RV32ZVBC64-NEXT: and s6, a0, s6 |
| ; RV32ZVBC64-NEXT: and s5, a0, s5 |
| ; RV32ZVBC64-NEXT: and s3, a0, s3 |
| ; RV32ZVBC64-NEXT: and s1, a0, s1 |
| ; RV32ZVBC64-NEXT: and t6, a0, t6 |
| ; RV32ZVBC64-NEXT: and t4, a0, t4 |
| ; RV32ZVBC64-NEXT: and t2, a0, t2 |
| ; RV32ZVBC64-NEXT: and t0, a0, t0 |
| ; RV32ZVBC64-NEXT: and a6, a0, a6 |
| ; RV32ZVBC64-NEXT: and a4, a0, a4 |
| ; RV32ZVBC64-NEXT: and a2, a0, a2 |
| ; RV32ZVBC64-NEXT: and a0, a0, a1 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, a0 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, a3 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, a5 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, a7 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t1 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t3 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t5 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, s0 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, s2 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, s4 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, s6 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, s5 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, s3 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, s1 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t6 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t4 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t2 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, t0 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, a6 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v24, v8, a4 |
| ; RV32ZVBC64-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC64-NEXT: vmul.vx v8, v8, a2 |
| ; RV32ZVBC64-NEXT: vxor.vv v8, v16, v8 |
| ; RV32ZVBC64-NEXT: lw s0, 28(sp) # 4-byte Folded Reload |
| ; RV32ZVBC64-NEXT: lw s1, 24(sp) # 4-byte Folded Reload |
| ; RV32ZVBC64-NEXT: lw s2, 20(sp) # 4-byte Folded Reload |
| ; RV32ZVBC64-NEXT: lw s3, 16(sp) # 4-byte Folded Reload |
| ; RV32ZVBC64-NEXT: lw s4, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVBC64-NEXT: lw s5, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVBC64-NEXT: lw s6, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVBC64-NEXT: lw s7, 0(sp) # 4-byte Folded Reload |
| ; RV32ZVBC64-NEXT: addi sp, sp, 32 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv16i32_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e32, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vmv.v.x v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 16 |
| ; RV64ZVBC64-NEXT: vand.vi v24, v16, 2 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v24, v8, v24 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v0, v24 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vi v0, v16, 8 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 128 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 256 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 512 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1024 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: li a0, 1 |
| ; RV64ZVBC64-NEXT: slli a0, a0, 11 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 1 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 2 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 4 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 8 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 16 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 32 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 64 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 128 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 256 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 512 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 1024 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 2048 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 4096 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 8192 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 16384 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 32768 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 65536 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 131072 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 262144 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC64-NEXT: lui a0, 524288 |
| ; RV64ZVBC64-NEXT: vand.vx v16, v16, a0 |
| ; RV64ZVBC64-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC64-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC64-NEXT: vmul.vv v8, v8, v16 |
| ; RV64ZVBC64-NEXT: vxor.vv v8, v24, v8 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv16i32_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e32, m8, ta, ma |
| ; RV32ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv16i32_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a1, zero, e32, m8, ta, ma |
| ; RV64ZVBC32-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 |
| %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer |
| %v = call <vscale x 16 x i32> @llvm.clmul.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb) |
| ret <vscale x 16 x i32> %v |
| } |
| |
| define <vscale x 1 x i64> @clmul_nxv1i64_vv(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv1i64_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -352 |
| ; RV32V-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 3 |
| ; RV32V-NEXT: sub a0, a1, a0 |
| ; RV32V-NEXT: sub sp, sp, a0 |
| ; RV32V-NEXT: lui a1, 524288 |
| ; RV32V-NEXT: li t5, 1 |
| ; RV32V-NEXT: li a4, 2 |
| ; RV32V-NEXT: li a2, 4 |
| ; RV32V-NEXT: li s11, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: li ra, 32 |
| ; RV32V-NEXT: li s10, 64 |
| ; RV32V-NEXT: li s9, 128 |
| ; RV32V-NEXT: li s8, 256 |
| ; RV32V-NEXT: li s7, 512 |
| ; RV32V-NEXT: li s1, 1024 |
| ; RV32V-NEXT: lui s6, 1 |
| ; RV32V-NEXT: lui s5, 2 |
| ; RV32V-NEXT: lui s4, 4 |
| ; RV32V-NEXT: lui s3, 8 |
| ; RV32V-NEXT: lui s2, 16 |
| ; RV32V-NEXT: lui s0, 32 |
| ; RV32V-NEXT: lui t6, 64 |
| ; RV32V-NEXT: lui t4, 128 |
| ; RV32V-NEXT: lui t3, 256 |
| ; RV32V-NEXT: lui t2, 512 |
| ; RV32V-NEXT: lui t1, 1024 |
| ; RV32V-NEXT: lui t0, 2048 |
| ; RV32V-NEXT: lui a7, 4096 |
| ; RV32V-NEXT: lui a6, 8192 |
| ; RV32V-NEXT: lui a5, 16384 |
| ; RV32V-NEXT: lui a3, 32768 |
| ; RV32V-NEXT: sw a1, 16(sp) |
| ; RV32V-NEXT: sw zero, 20(sp) |
| ; RV32V-NEXT: sw zero, 272(sp) |
| ; RV32V-NEXT: sw t5, 276(sp) |
| ; RV32V-NEXT: sw zero, 264(sp) |
| ; RV32V-NEXT: sw a4, 268(sp) |
| ; RV32V-NEXT: lui a4, 65536 |
| ; RV32V-NEXT: sw zero, 256(sp) |
| ; RV32V-NEXT: sw a2, 260(sp) |
| ; RV32V-NEXT: lui a2, 131072 |
| ; RV32V-NEXT: sw zero, 248(sp) |
| ; RV32V-NEXT: sw s11, 252(sp) |
| ; RV32V-NEXT: vsetvli s11, zero, e64, m1, ta, ma |
| ; RV32V-NEXT: vand.vi v13, v9, 2 |
| ; RV32V-NEXT: vand.vi v14, v9, 1 |
| ; RV32V-NEXT: vand.vi v12, v9, 4 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: sw zero, 240(sp) |
| ; RV32V-NEXT: sw a0, 244(sp) |
| ; RV32V-NEXT: vand.vx v10, v9, a0 |
| ; RV32V-NEXT: addi s11, sp, 16 |
| ; RV32V-NEXT: sw zero, 232(sp) |
| ; RV32V-NEXT: sw ra, 236(sp) |
| ; RV32V-NEXT: vand.vx v15, v9, ra |
| ; RV32V-NEXT: addi ra, sp, 272 |
| ; RV32V-NEXT: sw zero, 224(sp) |
| ; RV32V-NEXT: sw s10, 228(sp) |
| ; RV32V-NEXT: vand.vx v16, v9, s10 |
| ; RV32V-NEXT: addi s10, sp, 264 |
| ; RV32V-NEXT: sw zero, 216(sp) |
| ; RV32V-NEXT: sw s9, 220(sp) |
| ; RV32V-NEXT: vand.vx v17, v9, s9 |
| ; RV32V-NEXT: addi s9, sp, 256 |
| ; RV32V-NEXT: sw zero, 208(sp) |
| ; RV32V-NEXT: sw s8, 212(sp) |
| ; RV32V-NEXT: vand.vx v18, v9, s8 |
| ; RV32V-NEXT: addi s8, sp, 248 |
| ; RV32V-NEXT: sw zero, 200(sp) |
| ; RV32V-NEXT: sw s7, 204(sp) |
| ; RV32V-NEXT: vand.vx v19, v9, s7 |
| ; RV32V-NEXT: addi s7, sp, 240 |
| ; RV32V-NEXT: sw zero, 192(sp) |
| ; RV32V-NEXT: sw s1, 196(sp) |
| ; RV32V-NEXT: vand.vx v20, v9, s1 |
| ; RV32V-NEXT: slli t5, t5, 11 |
| ; RV32V-NEXT: vand.vx v21, v9, s6 |
| ; RV32V-NEXT: sw zero, 184(sp) |
| ; RV32V-NEXT: sw t5, 188(sp) |
| ; RV32V-NEXT: sw zero, 176(sp) |
| ; RV32V-NEXT: sw s6, 180(sp) |
| ; RV32V-NEXT: addi s6, sp, 224 |
| ; RV32V-NEXT: vand.vx v22, v9, s5 |
| ; RV32V-NEXT: sw zero, 168(sp) |
| ; RV32V-NEXT: sw s5, 172(sp) |
| ; RV32V-NEXT: addi s5, sp, 216 |
| ; RV32V-NEXT: vand.vx v23, v9, s4 |
| ; RV32V-NEXT: sw zero, 160(sp) |
| ; RV32V-NEXT: sw s4, 164(sp) |
| ; RV32V-NEXT: addi s4, sp, 208 |
| ; RV32V-NEXT: vand.vx v24, v9, s3 |
| ; RV32V-NEXT: sw zero, 152(sp) |
| ; RV32V-NEXT: sw s3, 156(sp) |
| ; RV32V-NEXT: addi s3, sp, 200 |
| ; RV32V-NEXT: vand.vx v25, v9, s2 |
| ; RV32V-NEXT: sw zero, 144(sp) |
| ; RV32V-NEXT: sw s2, 148(sp) |
| ; RV32V-NEXT: addi s2, sp, 192 |
| ; RV32V-NEXT: vand.vx v26, v9, s0 |
| ; RV32V-NEXT: sw zero, 136(sp) |
| ; RV32V-NEXT: sw s0, 140(sp) |
| ; RV32V-NEXT: addi s1, sp, 184 |
| ; RV32V-NEXT: vand.vx v27, v9, t6 |
| ; RV32V-NEXT: sw zero, 128(sp) |
| ; RV32V-NEXT: sw t6, 132(sp) |
| ; RV32V-NEXT: addi s0, sp, 176 |
| ; RV32V-NEXT: vand.vx v28, v9, t4 |
| ; RV32V-NEXT: sw zero, 120(sp) |
| ; RV32V-NEXT: sw t4, 124(sp) |
| ; RV32V-NEXT: addi t6, sp, 168 |
| ; RV32V-NEXT: vand.vx v29, v9, t3 |
| ; RV32V-NEXT: sw zero, 112(sp) |
| ; RV32V-NEXT: sw t3, 116(sp) |
| ; RV32V-NEXT: addi t4, sp, 160 |
| ; RV32V-NEXT: vand.vx v30, v9, t2 |
| ; RV32V-NEXT: sw zero, 104(sp) |
| ; RV32V-NEXT: sw t2, 108(sp) |
| ; RV32V-NEXT: addi t3, sp, 152 |
| ; RV32V-NEXT: vand.vx v31, v9, t1 |
| ; RV32V-NEXT: sw zero, 96(sp) |
| ; RV32V-NEXT: sw t1, 100(sp) |
| ; RV32V-NEXT: addi t2, sp, 144 |
| ; RV32V-NEXT: vand.vx v7, v9, t0 |
| ; RV32V-NEXT: sw zero, 88(sp) |
| ; RV32V-NEXT: sw t0, 92(sp) |
| ; RV32V-NEXT: addi t1, sp, 136 |
| ; RV32V-NEXT: vand.vx v6, v9, a7 |
| ; RV32V-NEXT: sw zero, 80(sp) |
| ; RV32V-NEXT: sw a7, 84(sp) |
| ; RV32V-NEXT: addi t0, sp, 128 |
| ; RV32V-NEXT: vand.vx v5, v9, a6 |
| ; RV32V-NEXT: sw zero, 72(sp) |
| ; RV32V-NEXT: sw a6, 76(sp) |
| ; RV32V-NEXT: addi a7, sp, 120 |
| ; RV32V-NEXT: vand.vx v4, v9, a5 |
| ; RV32V-NEXT: sw zero, 64(sp) |
| ; RV32V-NEXT: sw a5, 68(sp) |
| ; RV32V-NEXT: addi a6, sp, 112 |
| ; RV32V-NEXT: vand.vx v3, v9, a3 |
| ; RV32V-NEXT: sw zero, 56(sp) |
| ; RV32V-NEXT: sw a3, 60(sp) |
| ; RV32V-NEXT: addi a5, sp, 104 |
| ; RV32V-NEXT: vand.vx v2, v9, a4 |
| ; RV32V-NEXT: sw zero, 48(sp) |
| ; RV32V-NEXT: sw a4, 52(sp) |
| ; RV32V-NEXT: addi a4, sp, 96 |
| ; RV32V-NEXT: vand.vx v1, v9, a2 |
| ; RV32V-NEXT: sw zero, 40(sp) |
| ; RV32V-NEXT: sw a2, 44(sp) |
| ; RV32V-NEXT: addi a3, sp, 88 |
| ; RV32V-NEXT: sw zero, 32(sp) |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: sw a0, 36(sp) |
| ; RV32V-NEXT: sw zero, 24(sp) |
| ; RV32V-NEXT: sw a1, 28(sp) |
| ; RV32V-NEXT: addi a2, sp, 80 |
| ; RV32V-NEXT: vand.vx v0, v9, t5 |
| ; RV32V-NEXT: addi a1, sp, 72 |
| ; RV32V-NEXT: vmul.vv v13, v8, v13 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v14, v14, v13 |
| ; RV32V-NEXT: vlse64.v v13, (s11), zero |
| ; RV32V-NEXT: addi s11, sp, 64 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v14, v14, v12 |
| ; RV32V-NEXT: vlse64.v v12, (ra), zero |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: mv ra, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add t5, t5, ra |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs1r.v v12, (t5) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi ra, sp, 56 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v14, v14, v11 |
| ; RV32V-NEXT: vlse64.v v11, (s10), zero |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli s10, t5, 2 |
| ; RV32V-NEXT: add t5, s10, t5 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs1r.v v11, (t5) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi s10, sp, 48 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v14, v14, v10 |
| ; RV32V-NEXT: vlse64.v v10, (s9), zero |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs1r.v v10, (t5) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi t5, sp, 40 |
| ; RV32V-NEXT: vmul.vv v15, v8, v15 |
| ; RV32V-NEXT: vxor.vv v15, v14, v15 |
| ; RV32V-NEXT: vlse64.v v10, (s8), zero |
| ; RV32V-NEXT: csrr s8, vlenb |
| ; RV32V-NEXT: slli s9, s8, 1 |
| ; RV32V-NEXT: add s8, s9, s8 |
| ; RV32V-NEXT: add s8, sp, s8 |
| ; RV32V-NEXT: addi s8, s8, 288 |
| ; RV32V-NEXT: vs1r.v v10, (s8) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi s8, sp, 32 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v16, v15, v16 |
| ; RV32V-NEXT: vlse64.v v10, (s7), zero |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs1r.v v10, (s7) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi s7, sp, 24 |
| ; RV32V-NEXT: vmul.vv v17, v8, v17 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vmul.vv v19, v8, v19 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vmul.vv v21, v8, v21 |
| ; RV32V-NEXT: vmul.vv v22, v8, v22 |
| ; RV32V-NEXT: vmul.vv v23, v8, v23 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vmul.vv v25, v8, v25 |
| ; RV32V-NEXT: vmul.vv v26, v8, v26 |
| ; RV32V-NEXT: vmul.vv v27, v8, v27 |
| ; RV32V-NEXT: vmul.vv v28, v8, v28 |
| ; RV32V-NEXT: vmul.vv v29, v8, v29 |
| ; RV32V-NEXT: vmul.vv v30, v8, v30 |
| ; RV32V-NEXT: vmul.vv v31, v8, v31 |
| ; RV32V-NEXT: vmul.vv v7, v8, v7 |
| ; RV32V-NEXT: vmul.vv v6, v8, v6 |
| ; RV32V-NEXT: vmul.vv v5, v8, v5 |
| ; RV32V-NEXT: vmul.vv v4, v8, v4 |
| ; RV32V-NEXT: vmul.vv v3, v8, v3 |
| ; RV32V-NEXT: vmul.vv v2, v8, v2 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v16, v16, v17 |
| ; RV32V-NEXT: addi s9, sp, 232 |
| ; RV32V-NEXT: vlse64.v v11, (s9), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vlse64.v v10, (s6), zero |
| ; RV32V-NEXT: csrr s6, vlenb |
| ; RV32V-NEXT: add s6, sp, s6 |
| ; RV32V-NEXT: addi s6, s6, 288 |
| ; RV32V-NEXT: vs1r.v v10, (s6) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vxor.vv v16, v16, v19 |
| ; RV32V-NEXT: vlse64.v v10, (s5), zero |
| ; RV32V-NEXT: addi s5, sp, 288 |
| ; RV32V-NEXT: vs1r.v v10, (s5) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vlse64.v v12, (s4), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v0 |
| ; RV32V-NEXT: vlse64.v v0, (s3), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v21 |
| ; RV32V-NEXT: vlse64.v v21, (s2), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v22 |
| ; RV32V-NEXT: vlse64.v v22, (s1), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v23 |
| ; RV32V-NEXT: vlse64.v v23, (s0), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vlse64.v v24, (t6), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v25 |
| ; RV32V-NEXT: vlse64.v v25, (t4), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v26 |
| ; RV32V-NEXT: vlse64.v v26, (t3), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v27 |
| ; RV32V-NEXT: vlse64.v v27, (t2), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v28 |
| ; RV32V-NEXT: vlse64.v v28, (t1), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v29 |
| ; RV32V-NEXT: vlse64.v v29, (t0), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v30 |
| ; RV32V-NEXT: vlse64.v v30, (a7), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v31 |
| ; RV32V-NEXT: vlse64.v v31, (a6), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v7 |
| ; RV32V-NEXT: vlse64.v v7, (a5), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v6 |
| ; RV32V-NEXT: vlse64.v v6, (a4), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v5 |
| ; RV32V-NEXT: vlse64.v v5, (a3), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v4 |
| ; RV32V-NEXT: vlse64.v v4, (a2), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v3 |
| ; RV32V-NEXT: vlse64.v v3, (a1), zero |
| ; RV32V-NEXT: vxor.vv v16, v16, v2 |
| ; RV32V-NEXT: vlse64.v v2, (s11), zero |
| ; RV32V-NEXT: vxor.vv v1, v16, v1 |
| ; RV32V-NEXT: vlse64.v v10, (ra), zero |
| ; RV32V-NEXT: vand.vv v13, v9, v13 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: mv a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, a1, a2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl1r.v v14, (a1) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v14, v9, v14 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a2, a1, 2 |
| ; RV32V-NEXT: add a1, a2, a1 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl1r.v v15, (a1) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v15, v9, v15 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl1r.v v16, (a1) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v16, v9, v16 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a2, a1, 1 |
| ; RV32V-NEXT: add a1, a2, a1 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl1r.v v17, (a1) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v17, v9, v17 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl1r.v v18, (a1) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v18, v9, v18 |
| ; RV32V-NEXT: vand.vv v19, v9, v11 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v20, v9, v11 |
| ; RV32V-NEXT: addi a1, sp, 288 |
| ; RV32V-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v11, v9, v11 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v11, v9, v12 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a2, a1, 1 |
| ; RV32V-NEXT: add a1, a2, a1 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v0, v9, v0 |
| ; RV32V-NEXT: vand.vv v21, v9, v21 |
| ; RV32V-NEXT: vand.vv v22, v9, v22 |
| ; RV32V-NEXT: vand.vv v23, v9, v23 |
| ; RV32V-NEXT: vand.vv v24, v9, v24 |
| ; RV32V-NEXT: vand.vv v25, v9, v25 |
| ; RV32V-NEXT: vand.vv v26, v9, v26 |
| ; RV32V-NEXT: vand.vv v27, v9, v27 |
| ; RV32V-NEXT: vand.vv v28, v9, v28 |
| ; RV32V-NEXT: vand.vv v29, v9, v29 |
| ; RV32V-NEXT: vand.vv v30, v9, v30 |
| ; RV32V-NEXT: vand.vv v31, v9, v31 |
| ; RV32V-NEXT: vand.vv v7, v9, v7 |
| ; RV32V-NEXT: vand.vv v6, v9, v6 |
| ; RV32V-NEXT: vand.vv v5, v9, v5 |
| ; RV32V-NEXT: vand.vv v4, v9, v4 |
| ; RV32V-NEXT: vand.vv v11, v9, v3 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v2, v9, v2 |
| ; RV32V-NEXT: vand.vv v10, v9, v10 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: mv a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, a1, a2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v10, (s10), zero |
| ; RV32V-NEXT: vlse64.v v3, (t5), zero |
| ; RV32V-NEXT: vlse64.v v11, (s8), zero |
| ; RV32V-NEXT: vlse64.v v12, (s7), zero |
| ; RV32V-NEXT: vand.vv v10, v9, v10 |
| ; RV32V-NEXT: vand.vv v3, v9, v3 |
| ; RV32V-NEXT: vand.vv v11, v9, v11 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a2, a1, 2 |
| ; RV32V-NEXT: add a1, a2, a1 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v9, v12 |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v9, v8, v9 |
| ; RV32V-NEXT: vxor.vv v9, v1, v9 |
| ; RV32V-NEXT: vmul.vv v11, v8, v13 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v14 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v15 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v16 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v17 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v18 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v19 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v20 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 1 |
| ; RV32V-NEXT: add a0, a1, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v0 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v21 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v22 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v23 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v24 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v25 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v26 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v27 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v28 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v29 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v30 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v31 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v7 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v6 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v5 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v4 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v11, v8, v2 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v9, v9, v11 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v3 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 2 |
| ; RV32V-NEXT: add a0, a1, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v8, v8, v12 |
| ; RV32V-NEXT: vxor.vv v8, v9, v8 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 3 |
| ; RV32V-NEXT: sub a0, a1, a0 |
| ; RV32V-NEXT: add sp, sp, a0 |
| ; RV32V-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 352 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv1i64_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e64, m1, ta, ma |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: li a1, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: li a1, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: li a1, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a2, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a2 |
| ; RV64V-NEXT: slli a1, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 2048 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 4096 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 8192 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 16384 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 32768 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 65536 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 131072 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 262144 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 31 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 33 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 34 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 35 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 36 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 37 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 38 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 39 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 40 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 41 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 42 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 43 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 44 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 45 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 46 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 47 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 48 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 49 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 50 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 51 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 52 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 53 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 54 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 55 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 56 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 57 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 58 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 59 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 60 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 61 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: li a1, -1 |
| ; RV64V-NEXT: slli a0, a0, 62 |
| ; RV64V-NEXT: slli a1, a1, 63 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: vand.vx v9, v9, a1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v8, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv1i64_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv1i64_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v8, v9 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv1i64_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: addi sp, sp, -352 |
| ; RV32ZVBC32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 3 |
| ; RV32ZVBC32-NEXT: sub a0, a1, a0 |
| ; RV32ZVBC32-NEXT: sub sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lui a1, 524288 |
| ; RV32ZVBC32-NEXT: li t5, 1 |
| ; RV32ZVBC32-NEXT: li a4, 2 |
| ; RV32ZVBC32-NEXT: li a2, 4 |
| ; RV32ZVBC32-NEXT: li s11, 8 |
| ; RV32ZVBC32-NEXT: li a0, 16 |
| ; RV32ZVBC32-NEXT: li ra, 32 |
| ; RV32ZVBC32-NEXT: li s10, 64 |
| ; RV32ZVBC32-NEXT: li s9, 128 |
| ; RV32ZVBC32-NEXT: li s8, 256 |
| ; RV32ZVBC32-NEXT: li s7, 512 |
| ; RV32ZVBC32-NEXT: li s1, 1024 |
| ; RV32ZVBC32-NEXT: lui s6, 1 |
| ; RV32ZVBC32-NEXT: lui s5, 2 |
| ; RV32ZVBC32-NEXT: lui s4, 4 |
| ; RV32ZVBC32-NEXT: lui s3, 8 |
| ; RV32ZVBC32-NEXT: lui s2, 16 |
| ; RV32ZVBC32-NEXT: lui s0, 32 |
| ; RV32ZVBC32-NEXT: lui t6, 64 |
| ; RV32ZVBC32-NEXT: lui t4, 128 |
| ; RV32ZVBC32-NEXT: lui t3, 256 |
| ; RV32ZVBC32-NEXT: lui t2, 512 |
| ; RV32ZVBC32-NEXT: lui t1, 1024 |
| ; RV32ZVBC32-NEXT: lui t0, 2048 |
| ; RV32ZVBC32-NEXT: lui a7, 4096 |
| ; RV32ZVBC32-NEXT: lui a6, 8192 |
| ; RV32ZVBC32-NEXT: lui a5, 16384 |
| ; RV32ZVBC32-NEXT: lui a3, 32768 |
| ; RV32ZVBC32-NEXT: sw a1, 16(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 20(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 272(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 276(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 264(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 268(sp) |
| ; RV32ZVBC32-NEXT: lui a4, 65536 |
| ; RV32ZVBC32-NEXT: sw zero, 256(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 260(sp) |
| ; RV32ZVBC32-NEXT: lui a2, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 248(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 252(sp) |
| ; RV32ZVBC32-NEXT: vsetvli s11, zero, e64, m1, ta, ma |
| ; RV32ZVBC32-NEXT: vand.vi v13, v9, 2 |
| ; RV32ZVBC32-NEXT: vand.vi v14, v9, 1 |
| ; RV32ZVBC32-NEXT: vand.vi v12, v9, 4 |
| ; RV32ZVBC32-NEXT: vand.vi v11, v9, 8 |
| ; RV32ZVBC32-NEXT: sw zero, 240(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 244(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v10, v9, a0 |
| ; RV32ZVBC32-NEXT: addi s11, sp, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 232(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 236(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v15, v9, ra |
| ; RV32ZVBC32-NEXT: addi ra, sp, 272 |
| ; RV32ZVBC32-NEXT: sw zero, 224(sp) |
| ; RV32ZVBC32-NEXT: sw s10, 228(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v16, v9, s10 |
| ; RV32ZVBC32-NEXT: addi s10, sp, 264 |
| ; RV32ZVBC32-NEXT: sw zero, 216(sp) |
| ; RV32ZVBC32-NEXT: sw s9, 220(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v17, v9, s9 |
| ; RV32ZVBC32-NEXT: addi s9, sp, 256 |
| ; RV32ZVBC32-NEXT: sw zero, 208(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 212(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v18, v9, s8 |
| ; RV32ZVBC32-NEXT: addi s8, sp, 248 |
| ; RV32ZVBC32-NEXT: sw zero, 200(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 204(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v19, v9, s7 |
| ; RV32ZVBC32-NEXT: addi s7, sp, 240 |
| ; RV32ZVBC32-NEXT: sw zero, 192(sp) |
| ; RV32ZVBC32-NEXT: sw s1, 196(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v20, v9, s1 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 11 |
| ; RV32ZVBC32-NEXT: vand.vx v21, v9, s6 |
| ; RV32ZVBC32-NEXT: sw zero, 184(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 188(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 176(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 180(sp) |
| ; RV32ZVBC32-NEXT: addi s6, sp, 224 |
| ; RV32ZVBC32-NEXT: vand.vx v22, v9, s5 |
| ; RV32ZVBC32-NEXT: sw zero, 168(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 172(sp) |
| ; RV32ZVBC32-NEXT: addi s5, sp, 216 |
| ; RV32ZVBC32-NEXT: vand.vx v23, v9, s4 |
| ; RV32ZVBC32-NEXT: sw zero, 160(sp) |
| ; RV32ZVBC32-NEXT: sw s4, 164(sp) |
| ; RV32ZVBC32-NEXT: addi s4, sp, 208 |
| ; RV32ZVBC32-NEXT: vand.vx v24, v9, s3 |
| ; RV32ZVBC32-NEXT: sw zero, 152(sp) |
| ; RV32ZVBC32-NEXT: sw s3, 156(sp) |
| ; RV32ZVBC32-NEXT: addi s3, sp, 200 |
| ; RV32ZVBC32-NEXT: vand.vx v25, v9, s2 |
| ; RV32ZVBC32-NEXT: sw zero, 144(sp) |
| ; RV32ZVBC32-NEXT: sw s2, 148(sp) |
| ; RV32ZVBC32-NEXT: addi s2, sp, 192 |
| ; RV32ZVBC32-NEXT: vand.vx v26, v9, s0 |
| ; RV32ZVBC32-NEXT: sw zero, 136(sp) |
| ; RV32ZVBC32-NEXT: sw s0, 140(sp) |
| ; RV32ZVBC32-NEXT: addi s1, sp, 184 |
| ; RV32ZVBC32-NEXT: vand.vx v27, v9, t6 |
| ; RV32ZVBC32-NEXT: sw zero, 128(sp) |
| ; RV32ZVBC32-NEXT: sw t6, 132(sp) |
| ; RV32ZVBC32-NEXT: addi s0, sp, 176 |
| ; RV32ZVBC32-NEXT: vand.vx v28, v9, t4 |
| ; RV32ZVBC32-NEXT: sw zero, 120(sp) |
| ; RV32ZVBC32-NEXT: sw t4, 124(sp) |
| ; RV32ZVBC32-NEXT: addi t6, sp, 168 |
| ; RV32ZVBC32-NEXT: vand.vx v29, v9, t3 |
| ; RV32ZVBC32-NEXT: sw zero, 112(sp) |
| ; RV32ZVBC32-NEXT: sw t3, 116(sp) |
| ; RV32ZVBC32-NEXT: addi t4, sp, 160 |
| ; RV32ZVBC32-NEXT: vand.vx v30, v9, t2 |
| ; RV32ZVBC32-NEXT: sw zero, 104(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 108(sp) |
| ; RV32ZVBC32-NEXT: addi t3, sp, 152 |
| ; RV32ZVBC32-NEXT: vand.vx v31, v9, t1 |
| ; RV32ZVBC32-NEXT: sw zero, 96(sp) |
| ; RV32ZVBC32-NEXT: sw t1, 100(sp) |
| ; RV32ZVBC32-NEXT: addi t2, sp, 144 |
| ; RV32ZVBC32-NEXT: vand.vx v7, v9, t0 |
| ; RV32ZVBC32-NEXT: sw zero, 88(sp) |
| ; RV32ZVBC32-NEXT: sw t0, 92(sp) |
| ; RV32ZVBC32-NEXT: addi t1, sp, 136 |
| ; RV32ZVBC32-NEXT: vand.vx v6, v9, a7 |
| ; RV32ZVBC32-NEXT: sw zero, 80(sp) |
| ; RV32ZVBC32-NEXT: sw a7, 84(sp) |
| ; RV32ZVBC32-NEXT: addi t0, sp, 128 |
| ; RV32ZVBC32-NEXT: vand.vx v5, v9, a6 |
| ; RV32ZVBC32-NEXT: sw zero, 72(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 76(sp) |
| ; RV32ZVBC32-NEXT: addi a7, sp, 120 |
| ; RV32ZVBC32-NEXT: vand.vx v4, v9, a5 |
| ; RV32ZVBC32-NEXT: sw zero, 64(sp) |
| ; RV32ZVBC32-NEXT: sw a5, 68(sp) |
| ; RV32ZVBC32-NEXT: addi a6, sp, 112 |
| ; RV32ZVBC32-NEXT: vand.vx v3, v9, a3 |
| ; RV32ZVBC32-NEXT: sw zero, 56(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 60(sp) |
| ; RV32ZVBC32-NEXT: addi a5, sp, 104 |
| ; RV32ZVBC32-NEXT: vand.vx v2, v9, a4 |
| ; RV32ZVBC32-NEXT: sw zero, 48(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 52(sp) |
| ; RV32ZVBC32-NEXT: addi a4, sp, 96 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a2 |
| ; RV32ZVBC32-NEXT: sw zero, 40(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 44(sp) |
| ; RV32ZVBC32-NEXT: addi a3, sp, 88 |
| ; RV32ZVBC32-NEXT: sw zero, 32(sp) |
| ; RV32ZVBC32-NEXT: lui a0, 262144 |
| ; RV32ZVBC32-NEXT: sw a0, 36(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 24(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 28(sp) |
| ; RV32ZVBC32-NEXT: addi a2, sp, 80 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, t5 |
| ; RV32ZVBC32-NEXT: addi a1, sp, 72 |
| ; RV32ZVBC32-NEXT: vmul.vv v13, v8, v13 |
| ; RV32ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC32-NEXT: vxor.vv v14, v14, v13 |
| ; RV32ZVBC32-NEXT: vlse64.v v13, (s11), zero |
| ; RV32ZVBC32-NEXT: addi s11, sp, 64 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v14, v14, v12 |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (ra), zero |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: mv ra, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add t5, t5, ra |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v12, (t5) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi ra, sp, 56 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV32ZVBC32-NEXT: vxor.vv v14, v14, v11 |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (s10), zero |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli s10, t5, 2 |
| ; RV32ZVBC32-NEXT: add t5, s10, t5 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (t5) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s10, sp, 48 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v14, v14, v10 |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (s9), zero |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (t5) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi t5, sp, 40 |
| ; RV32ZVBC32-NEXT: vmul.vv v15, v8, v15 |
| ; RV32ZVBC32-NEXT: vxor.vv v15, v14, v15 |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (s8), zero |
| ; RV32ZVBC32-NEXT: csrr s8, vlenb |
| ; RV32ZVBC32-NEXT: slli s9, s8, 1 |
| ; RV32ZVBC32-NEXT: add s8, s9, s8 |
| ; RV32ZVBC32-NEXT: add s8, sp, s8 |
| ; RV32ZVBC32-NEXT: addi s8, s8, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (s8) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s8, sp, 32 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v15, v16 |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (s7), zero |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (s7) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s7, sp, 24 |
| ; RV32ZVBC32-NEXT: vmul.vv v17, v8, v17 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vmul.vv v19, v8, v19 |
| ; RV32ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC32-NEXT: vmul.vv v21, v8, v21 |
| ; RV32ZVBC32-NEXT: vmul.vv v22, v8, v22 |
| ; RV32ZVBC32-NEXT: vmul.vv v23, v8, v23 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vmul.vv v25, v8, v25 |
| ; RV32ZVBC32-NEXT: vmul.vv v26, v8, v26 |
| ; RV32ZVBC32-NEXT: vmul.vv v27, v8, v27 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v28 |
| ; RV32ZVBC32-NEXT: vmul.vv v29, v8, v29 |
| ; RV32ZVBC32-NEXT: vmul.vv v30, v8, v30 |
| ; RV32ZVBC32-NEXT: vmul.vv v31, v8, v31 |
| ; RV32ZVBC32-NEXT: vmul.vv v7, v8, v7 |
| ; RV32ZVBC32-NEXT: vmul.vv v6, v8, v6 |
| ; RV32ZVBC32-NEXT: vmul.vv v5, v8, v5 |
| ; RV32ZVBC32-NEXT: vmul.vv v4, v8, v4 |
| ; RV32ZVBC32-NEXT: vmul.vv v3, v8, v3 |
| ; RV32ZVBC32-NEXT: vmul.vv v2, v8, v2 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v17 |
| ; RV32ZVBC32-NEXT: addi s9, sp, 232 |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (s9), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (s6), zero |
| ; RV32ZVBC32-NEXT: csrr s6, vlenb |
| ; RV32ZVBC32-NEXT: add s6, sp, s6 |
| ; RV32ZVBC32-NEXT: addi s6, s6, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (s6) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v19 |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (s5), zero |
| ; RV32ZVBC32-NEXT: addi s5, sp, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (s5) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s4), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v0 |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s3), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v21 |
| ; RV32ZVBC32-NEXT: vlse64.v v21, (s2), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v22 |
| ; RV32ZVBC32-NEXT: vlse64.v v22, (s1), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v23 |
| ; RV32ZVBC32-NEXT: vlse64.v v23, (s0), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t6), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v25 |
| ; RV32ZVBC32-NEXT: vlse64.v v25, (t4), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v26 |
| ; RV32ZVBC32-NEXT: vlse64.v v26, (t3), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v27 |
| ; RV32ZVBC32-NEXT: vlse64.v v27, (t2), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v28 |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (t1), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v29 |
| ; RV32ZVBC32-NEXT: vlse64.v v29, (t0), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v30 |
| ; RV32ZVBC32-NEXT: vlse64.v v30, (a7), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v31 |
| ; RV32ZVBC32-NEXT: vlse64.v v31, (a6), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v7 |
| ; RV32ZVBC32-NEXT: vlse64.v v7, (a5), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v6 |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (a4), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v5 |
| ; RV32ZVBC32-NEXT: vlse64.v v5, (a3), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v4 |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (a2), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v3 |
| ; RV32ZVBC32-NEXT: vlse64.v v3, (a1), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v2 |
| ; RV32ZVBC32-NEXT: vlse64.v v2, (s11), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v16, v1 |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (ra), zero |
| ; RV32ZVBC32-NEXT: vand.vv v13, v9, v13 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: mv a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v14, (a1) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v14, v9, v14 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, a2, a1 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v15, (a1) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v15, v9, v15 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v16, (a1) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v16, v9, v16 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a2, a1 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v17, (a1) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v17, v9, v17 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v18, (a1) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v18, v9, v18 |
| ; RV32ZVBC32-NEXT: vand.vv v19, v9, v11 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v20, v9, v11 |
| ; RV32ZVBC32-NEXT: addi a1, sp, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v11, v9, v11 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v11, v9, v12 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a2, a1 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v0, v9, v0 |
| ; RV32ZVBC32-NEXT: vand.vv v21, v9, v21 |
| ; RV32ZVBC32-NEXT: vand.vv v22, v9, v22 |
| ; RV32ZVBC32-NEXT: vand.vv v23, v9, v23 |
| ; RV32ZVBC32-NEXT: vand.vv v24, v9, v24 |
| ; RV32ZVBC32-NEXT: vand.vv v25, v9, v25 |
| ; RV32ZVBC32-NEXT: vand.vv v26, v9, v26 |
| ; RV32ZVBC32-NEXT: vand.vv v27, v9, v27 |
| ; RV32ZVBC32-NEXT: vand.vv v28, v9, v28 |
| ; RV32ZVBC32-NEXT: vand.vv v29, v9, v29 |
| ; RV32ZVBC32-NEXT: vand.vv v30, v9, v30 |
| ; RV32ZVBC32-NEXT: vand.vv v31, v9, v31 |
| ; RV32ZVBC32-NEXT: vand.vv v7, v9, v7 |
| ; RV32ZVBC32-NEXT: vand.vv v6, v9, v6 |
| ; RV32ZVBC32-NEXT: vand.vv v5, v9, v5 |
| ; RV32ZVBC32-NEXT: vand.vv v4, v9, v4 |
| ; RV32ZVBC32-NEXT: vand.vv v11, v9, v3 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v2, v9, v2 |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: mv a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (s10), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v3, (t5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (s8), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s7), zero |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v10 |
| ; RV32ZVBC32-NEXT: vand.vv v3, v9, v3 |
| ; RV32ZVBC32-NEXT: vand.vv v11, v9, v11 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, a2, a1 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v9, v12 |
| ; RV32ZVBC32-NEXT: vand.vx v9, v9, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v9, v8, v9 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v1, v9 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v13 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v14 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v15 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v17 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v19 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v21 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v22 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v23 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v25 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v26 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v27 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v29 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v30 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v31 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v7 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v6 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v5 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v2 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v11 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v3 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v8, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v8, v9, v8 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 3 |
| ; RV32ZVBC32-NEXT: sub a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: addi sp, sp, 352 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv1i64_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e64, m1, ta, ma |
| ; RV64ZVBC32-NEXT: vand.vi v10, v9, 2 |
| ; RV64ZVBC32-NEXT: vand.vi v11, v9, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v11, v10 |
| ; RV64ZVBC32-NEXT: vand.vi v11, v9, 4 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vi v11, v9, 8 |
| ; RV64ZVBC32-NEXT: li a0, 16 |
| ; RV64ZVBC32-NEXT: li a1, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a0 |
| ; RV64ZVBC32-NEXT: li a0, 64 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: li a1, 128 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a0 |
| ; RV64ZVBC32-NEXT: li a0, 256 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: li a1, 512 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a0 |
| ; RV64ZVBC32-NEXT: li a2, 1024 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: li a0, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a2 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 11 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 2 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 4 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 8 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 16 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 64 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 128 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 256 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 512 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 1024 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 2048 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 4096 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 8192 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 16384 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 32768 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 65536 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 131072 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 262144 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 31 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 33 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 34 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 35 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 36 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 37 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 38 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 39 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 40 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 41 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 42 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 43 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 44 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 45 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 46 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 47 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 48 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 49 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 50 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 51 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 52 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 53 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 54 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 55 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 56 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 57 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 58 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 59 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 60 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 61 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: li a1, -1 |
| ; RV64ZVBC32-NEXT: slli a0, a0, 62 |
| ; RV64ZVBC32-NEXT: slli a1, a1, 63 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a0 |
| ; RV64ZVBC32-NEXT: vand.vx v9, v9, a1 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vmul.vv v8, v8, v9 |
| ; RV64ZVBC32-NEXT: vxor.vv v8, v10, v8 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 1 x i64> @llvm.clmul.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb) |
| ret <vscale x 1 x i64> %v |
| } |
| |
| define <vscale x 1 x i64> @clmul_nxv1i64_vx(<vscale x 1 x i64> %va, i64 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv1i64_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -352 |
| ; RV32V-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a3, a2, 2 |
| ; RV32V-NEXT: add a2, a3, a2 |
| ; RV32V-NEXT: sub sp, sp, a2 |
| ; RV32V-NEXT: sw a0, 8(sp) |
| ; RV32V-NEXT: sw a1, 12(sp) |
| ; RV32V-NEXT: addi s4, sp, 8 |
| ; RV32V-NEXT: lui s2, 524288 |
| ; RV32V-NEXT: li s11, 1 |
| ; RV32V-NEXT: li s6, 2 |
| ; RV32V-NEXT: li s8, 4 |
| ; RV32V-NEXT: li s10, 8 |
| ; RV32V-NEXT: li ra, 64 |
| ; RV32V-NEXT: li s9, 128 |
| ; RV32V-NEXT: li s7, 256 |
| ; RV32V-NEXT: li s5, 512 |
| ; RV32V-NEXT: li s3, 1024 |
| ; RV32V-NEXT: lui s1, 1 |
| ; RV32V-NEXT: lui s0, 2 |
| ; RV32V-NEXT: lui t6, 4 |
| ; RV32V-NEXT: lui t5, 8 |
| ; RV32V-NEXT: lui t4, 16 |
| ; RV32V-NEXT: lui t3, 32 |
| ; RV32V-NEXT: lui t2, 64 |
| ; RV32V-NEXT: lui t1, 128 |
| ; RV32V-NEXT: lui t0, 256 |
| ; RV32V-NEXT: lui a6, 512 |
| ; RV32V-NEXT: lui a5, 1024 |
| ; RV32V-NEXT: lui a4, 2048 |
| ; RV32V-NEXT: lui a3, 4096 |
| ; RV32V-NEXT: lui a2, 8192 |
| ; RV32V-NEXT: lui a0, 16384 |
| ; RV32V-NEXT: vsetvli a1, zero, e64, m1, ta, ma |
| ; RV32V-NEXT: vlse64.v v9, (s4), zero |
| ; RV32V-NEXT: lui s4, 32768 |
| ; RV32V-NEXT: sw s2, 16(sp) |
| ; RV32V-NEXT: lui a7, 524288 |
| ; RV32V-NEXT: sw zero, 20(sp) |
| ; RV32V-NEXT: sw zero, 272(sp) |
| ; RV32V-NEXT: sw s11, 276(sp) |
| ; RV32V-NEXT: sw zero, 264(sp) |
| ; RV32V-NEXT: sw s6, 268(sp) |
| ; RV32V-NEXT: lui s6, 65536 |
| ; RV32V-NEXT: sw zero, 256(sp) |
| ; RV32V-NEXT: sw s8, 260(sp) |
| ; RV32V-NEXT: lui s8, 131072 |
| ; RV32V-NEXT: sw zero, 248(sp) |
| ; RV32V-NEXT: sw s10, 252(sp) |
| ; RV32V-NEXT: lui a1, 262144 |
| ; RV32V-NEXT: sw zero, 240(sp) |
| ; RV32V-NEXT: li s2, 16 |
| ; RV32V-NEXT: sw s2, 244(sp) |
| ; RV32V-NEXT: li s10, 16 |
| ; RV32V-NEXT: sw zero, 232(sp) |
| ; RV32V-NEXT: li s2, 32 |
| ; RV32V-NEXT: sw s2, 236(sp) |
| ; RV32V-NEXT: sw zero, 224(sp) |
| ; RV32V-NEXT: sw ra, 228(sp) |
| ; RV32V-NEXT: sw zero, 216(sp) |
| ; RV32V-NEXT: sw s9, 220(sp) |
| ; RV32V-NEXT: li s2, 128 |
| ; RV32V-NEXT: sw zero, 208(sp) |
| ; RV32V-NEXT: sw s7, 212(sp) |
| ; RV32V-NEXT: sw zero, 200(sp) |
| ; RV32V-NEXT: sw s5, 204(sp) |
| ; RV32V-NEXT: sw zero, 192(sp) |
| ; RV32V-NEXT: sw s3, 196(sp) |
| ; RV32V-NEXT: slli s11, s11, 11 |
| ; RV32V-NEXT: sw zero, 184(sp) |
| ; RV32V-NEXT: sw s11, 188(sp) |
| ; RV32V-NEXT: sw zero, 176(sp) |
| ; RV32V-NEXT: sw s1, 180(sp) |
| ; RV32V-NEXT: lui s3, 1 |
| ; RV32V-NEXT: sw zero, 168(sp) |
| ; RV32V-NEXT: sw s0, 172(sp) |
| ; RV32V-NEXT: lui s1, 2 |
| ; RV32V-NEXT: sw zero, 160(sp) |
| ; RV32V-NEXT: sw t6, 164(sp) |
| ; RV32V-NEXT: lui s0, 4 |
| ; RV32V-NEXT: sw zero, 152(sp) |
| ; RV32V-NEXT: sw t5, 156(sp) |
| ; RV32V-NEXT: sw zero, 144(sp) |
| ; RV32V-NEXT: sw t4, 148(sp) |
| ; RV32V-NEXT: lui t6, 16 |
| ; RV32V-NEXT: sw zero, 136(sp) |
| ; RV32V-NEXT: sw t3, 140(sp) |
| ; RV32V-NEXT: lui t4, 32 |
| ; RV32V-NEXT: sw zero, 128(sp) |
| ; RV32V-NEXT: sw t2, 132(sp) |
| ; RV32V-NEXT: lui t3, 64 |
| ; RV32V-NEXT: sw zero, 120(sp) |
| ; RV32V-NEXT: sw t1, 124(sp) |
| ; RV32V-NEXT: lui t2, 128 |
| ; RV32V-NEXT: sw zero, 112(sp) |
| ; RV32V-NEXT: sw t0, 116(sp) |
| ; RV32V-NEXT: sw zero, 104(sp) |
| ; RV32V-NEXT: sw a6, 108(sp) |
| ; RV32V-NEXT: sw zero, 96(sp) |
| ; RV32V-NEXT: sw a5, 100(sp) |
| ; RV32V-NEXT: sw zero, 88(sp) |
| ; RV32V-NEXT: sw a4, 92(sp) |
| ; RV32V-NEXT: lui t1, 2048 |
| ; RV32V-NEXT: sw zero, 80(sp) |
| ; RV32V-NEXT: sw a3, 84(sp) |
| ; RV32V-NEXT: sw zero, 72(sp) |
| ; RV32V-NEXT: sw a2, 76(sp) |
| ; RV32V-NEXT: lui a4, 8192 |
| ; RV32V-NEXT: sw zero, 64(sp) |
| ; RV32V-NEXT: sw a0, 68(sp) |
| ; RV32V-NEXT: lui a2, 16384 |
| ; RV32V-NEXT: sw zero, 56(sp) |
| ; RV32V-NEXT: sw s4, 60(sp) |
| ; RV32V-NEXT: sw zero, 48(sp) |
| ; RV32V-NEXT: sw s6, 52(sp) |
| ; RV32V-NEXT: sw zero, 40(sp) |
| ; RV32V-NEXT: sw s8, 44(sp) |
| ; RV32V-NEXT: sw zero, 32(sp) |
| ; RV32V-NEXT: sw a1, 36(sp) |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: sw zero, 24(sp) |
| ; RV32V-NEXT: sw a7, 28(sp) |
| ; RV32V-NEXT: addi a1, sp, 16 |
| ; RV32V-NEXT: vlse64.v v11, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 272 |
| ; RV32V-NEXT: vlse64.v v10, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 264 |
| ; RV32V-NEXT: vlse64.v v13, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 256 |
| ; RV32V-NEXT: vlse64.v v14, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 248 |
| ; RV32V-NEXT: vlse64.v v15, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 240 |
| ; RV32V-NEXT: vlse64.v v16, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 232 |
| ; RV32V-NEXT: vlse64.v v17, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 224 |
| ; RV32V-NEXT: vlse64.v v18, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 216 |
| ; RV32V-NEXT: vlse64.v v19, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 208 |
| ; RV32V-NEXT: vlse64.v v20, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 200 |
| ; RV32V-NEXT: vlse64.v v21, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 192 |
| ; RV32V-NEXT: vlse64.v v22, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 184 |
| ; RV32V-NEXT: vlse64.v v23, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 176 |
| ; RV32V-NEXT: vlse64.v v24, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 168 |
| ; RV32V-NEXT: vlse64.v v25, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 160 |
| ; RV32V-NEXT: vlse64.v v26, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 152 |
| ; RV32V-NEXT: vlse64.v v27, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 144 |
| ; RV32V-NEXT: vlse64.v v28, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 136 |
| ; RV32V-NEXT: vlse64.v v29, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 128 |
| ; RV32V-NEXT: vlse64.v v30, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 120 |
| ; RV32V-NEXT: vlse64.v v31, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 112 |
| ; RV32V-NEXT: vlse64.v v7, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 104 |
| ; RV32V-NEXT: vlse64.v v6, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 96 |
| ; RV32V-NEXT: vlse64.v v5, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 88 |
| ; RV32V-NEXT: vlse64.v v3, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 80 |
| ; RV32V-NEXT: vlse64.v v12, (a1), zero |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v12, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi a1, sp, 72 |
| ; RV32V-NEXT: vlse64.v v4, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 64 |
| ; RV32V-NEXT: vlse64.v v2, (a1), zero |
| ; RV32V-NEXT: addi ra, sp, 56 |
| ; RV32V-NEXT: vand.vi v1, v9, 2 |
| ; RV32V-NEXT: vand.vi v0, v9, 1 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v0, v1 |
| ; RV32V-NEXT: vand.vi v0, v9, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vi v0, v9, 8 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, s10 |
| ; RV32V-NEXT: addi s10, sp, 48 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: li a1, 32 |
| ; RV32V-NEXT: vand.vx v0, v9, a1 |
| ; RV32V-NEXT: addi s9, sp, 40 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: li a1, 64 |
| ; RV32V-NEXT: vand.vx v0, v9, a1 |
| ; RV32V-NEXT: addi s7, sp, 32 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, s2 |
| ; RV32V-NEXT: addi s5, sp, 24 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: li a1, 256 |
| ; RV32V-NEXT: vand.vx v0, v9, a1 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: li a1, 512 |
| ; RV32V-NEXT: vand.vx v0, v9, a1 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: li a1, 1024 |
| ; RV32V-NEXT: vand.vx v0, v9, a1 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, s11 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, s3 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, s1 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, s0 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, t5 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, t6 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, t4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, t3 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, t2 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, t0 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, a6 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, a5 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, t1 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, a3 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, a4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, a2 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, s4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, s6 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vand.vx v0, v9, s8 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v1, v1, v0 |
| ; RV32V-NEXT: vlse64.v v0, (ra), zero |
| ; RV32V-NEXT: vand.vv v11, v9, v11 |
| ; RV32V-NEXT: vand.vv v12, v9, v10 |
| ; RV32V-NEXT: vand.vv v13, v9, v13 |
| ; RV32V-NEXT: vand.vv v14, v9, v14 |
| ; RV32V-NEXT: vand.vv v15, v9, v15 |
| ; RV32V-NEXT: vand.vv v16, v9, v16 |
| ; RV32V-NEXT: vand.vv v17, v9, v17 |
| ; RV32V-NEXT: vand.vv v18, v9, v18 |
| ; RV32V-NEXT: vand.vv v19, v9, v19 |
| ; RV32V-NEXT: vand.vv v20, v9, v20 |
| ; RV32V-NEXT: vand.vv v21, v9, v21 |
| ; RV32V-NEXT: vand.vv v22, v9, v22 |
| ; RV32V-NEXT: vand.vv v23, v9, v23 |
| ; RV32V-NEXT: vand.vv v24, v9, v24 |
| ; RV32V-NEXT: vand.vv v25, v9, v25 |
| ; RV32V-NEXT: vand.vv v26, v9, v26 |
| ; RV32V-NEXT: vand.vv v27, v9, v27 |
| ; RV32V-NEXT: vand.vv v28, v9, v28 |
| ; RV32V-NEXT: vand.vv v29, v9, v29 |
| ; RV32V-NEXT: vand.vv v30, v9, v30 |
| ; RV32V-NEXT: vand.vv v31, v9, v31 |
| ; RV32V-NEXT: vand.vv v7, v9, v7 |
| ; RV32V-NEXT: vand.vv v6, v9, v6 |
| ; RV32V-NEXT: vand.vv v5, v9, v5 |
| ; RV32V-NEXT: vand.vv v3, v9, v3 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a1) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v10, v9, v10 |
| ; RV32V-NEXT: addi a1, sp, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v10, v9, v4 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v10, v9, v2 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v10, v9, v0 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v4, (s10), zero |
| ; RV32V-NEXT: vlse64.v v2, (s9), zero |
| ; RV32V-NEXT: vlse64.v v0, (s7), zero |
| ; RV32V-NEXT: vlse64.v v10, (s5), zero |
| ; RV32V-NEXT: vand.vv v4, v9, v4 |
| ; RV32V-NEXT: vand.vv v2, v9, v2 |
| ; RV32V-NEXT: vand.vv v0, v9, v0 |
| ; RV32V-NEXT: vand.vv v10, v9, v10 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a2, a1, 1 |
| ; RV32V-NEXT: add a1, a2, a1 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vx v9, v9, a0 |
| ; RV32V-NEXT: vmul.vv v9, v8, v9 |
| ; RV32V-NEXT: vxor.vv v9, v1, v9 |
| ; RV32V-NEXT: vmul.vv v10, v8, v11 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v12 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v13 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v14 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v15 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v16 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v17 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v18 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v19 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v20 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v21 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v22 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v23 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v24 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v25 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v26 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v27 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v28 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v29 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v30 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v31 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v7 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v6 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v5 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v3 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v4 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v2 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v0 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 1 |
| ; RV32V-NEXT: add a0, a1, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v8, v8, v10 |
| ; RV32V-NEXT: vxor.vv v8, v9, v8 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 2 |
| ; RV32V-NEXT: add a0, a1, a0 |
| ; RV32V-NEXT: add sp, sp, a0 |
| ; RV32V-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 352 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv1i64_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: andi a1, a0, 2 |
| ; RV64V-NEXT: andi a2, a0, 1 |
| ; RV64V-NEXT: vsetvli a3, zero, e64, m1, ta, ma |
| ; RV64V-NEXT: vmul.vx v9, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 4 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 8 |
| ; RV64V-NEXT: vxor.vv v9, v10, v9 |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 16 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 32 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 64 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 128 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 256 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 512 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: andi a3, a0, 1024 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: li a1, 1 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a3 |
| ; RV64V-NEXT: slli a2, a1, 11 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 1 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 2 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 4 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 8 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 16 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 32 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 64 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 128 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 256 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 512 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 1024 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 2048 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 4096 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 8192 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 16384 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 32768 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 65536 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 131072 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 262144 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: srliw a2, a0, 31 |
| ; RV64V-NEXT: slli a2, a2, 31 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 32 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 33 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 34 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 35 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 36 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 37 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 38 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 39 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 40 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 41 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 42 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 43 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 44 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 45 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 46 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 47 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 48 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 49 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 50 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 51 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 52 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 53 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 54 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 55 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 56 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 57 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 58 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 59 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 60 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 61 |
| ; RV64V-NEXT: slli a1, a1, 62 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: and a1, a0, a1 |
| ; RV64V-NEXT: srli a0, a0, 63 |
| ; RV64V-NEXT: slli a0, a0, 63 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v8, v8, a0 |
| ; RV64V-NEXT: vxor.vv v8, v9, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv1i64_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: addi sp, sp, -16 |
| ; RV32ZVBC64-NEXT: sw a0, 8(sp) |
| ; RV32ZVBC64-NEXT: sw a1, 12(sp) |
| ; RV32ZVBC64-NEXT: addi a0, sp, 8 |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e64, m1, ta, ma |
| ; RV32ZVBC64-NEXT: vlse64.v v9, (a0), zero |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v8, v9 |
| ; RV32ZVBC64-NEXT: addi sp, sp, 16 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv1i64_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e64, m1, ta, ma |
| ; RV64ZVBC64-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv1i64_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: addi sp, sp, -352 |
| ; RV32ZVBC32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a3, a2, 2 |
| ; RV32ZVBC32-NEXT: add a2, a3, a2 |
| ; RV32ZVBC32-NEXT: sub sp, sp, a2 |
| ; RV32ZVBC32-NEXT: sw a0, 8(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 12(sp) |
| ; RV32ZVBC32-NEXT: addi s4, sp, 8 |
| ; RV32ZVBC32-NEXT: lui s2, 524288 |
| ; RV32ZVBC32-NEXT: li s11, 1 |
| ; RV32ZVBC32-NEXT: li s6, 2 |
| ; RV32ZVBC32-NEXT: li s8, 4 |
| ; RV32ZVBC32-NEXT: li s10, 8 |
| ; RV32ZVBC32-NEXT: li ra, 64 |
| ; RV32ZVBC32-NEXT: li s9, 128 |
| ; RV32ZVBC32-NEXT: li s7, 256 |
| ; RV32ZVBC32-NEXT: li s5, 512 |
| ; RV32ZVBC32-NEXT: li s3, 1024 |
| ; RV32ZVBC32-NEXT: lui s1, 1 |
| ; RV32ZVBC32-NEXT: lui s0, 2 |
| ; RV32ZVBC32-NEXT: lui t6, 4 |
| ; RV32ZVBC32-NEXT: lui t5, 8 |
| ; RV32ZVBC32-NEXT: lui t4, 16 |
| ; RV32ZVBC32-NEXT: lui t3, 32 |
| ; RV32ZVBC32-NEXT: lui t2, 64 |
| ; RV32ZVBC32-NEXT: lui t1, 128 |
| ; RV32ZVBC32-NEXT: lui t0, 256 |
| ; RV32ZVBC32-NEXT: lui a6, 512 |
| ; RV32ZVBC32-NEXT: lui a5, 1024 |
| ; RV32ZVBC32-NEXT: lui a4, 2048 |
| ; RV32ZVBC32-NEXT: lui a3, 4096 |
| ; RV32ZVBC32-NEXT: lui a2, 8192 |
| ; RV32ZVBC32-NEXT: lui a0, 16384 |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e64, m1, ta, ma |
| ; RV32ZVBC32-NEXT: vlse64.v v9, (s4), zero |
| ; RV32ZVBC32-NEXT: lui s4, 32768 |
| ; RV32ZVBC32-NEXT: sw s2, 16(sp) |
| ; RV32ZVBC32-NEXT: lui a7, 524288 |
| ; RV32ZVBC32-NEXT: sw zero, 20(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 272(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 276(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 264(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 268(sp) |
| ; RV32ZVBC32-NEXT: lui s6, 65536 |
| ; RV32ZVBC32-NEXT: sw zero, 256(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 260(sp) |
| ; RV32ZVBC32-NEXT: lui s8, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 248(sp) |
| ; RV32ZVBC32-NEXT: sw s10, 252(sp) |
| ; RV32ZVBC32-NEXT: lui a1, 262144 |
| ; RV32ZVBC32-NEXT: sw zero, 240(sp) |
| ; RV32ZVBC32-NEXT: li s2, 16 |
| ; RV32ZVBC32-NEXT: sw s2, 244(sp) |
| ; RV32ZVBC32-NEXT: li s10, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 232(sp) |
| ; RV32ZVBC32-NEXT: li s2, 32 |
| ; RV32ZVBC32-NEXT: sw s2, 236(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 224(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 228(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 216(sp) |
| ; RV32ZVBC32-NEXT: sw s9, 220(sp) |
| ; RV32ZVBC32-NEXT: li s2, 128 |
| ; RV32ZVBC32-NEXT: sw zero, 208(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 212(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 200(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 204(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 192(sp) |
| ; RV32ZVBC32-NEXT: sw s3, 196(sp) |
| ; RV32ZVBC32-NEXT: slli s11, s11, 11 |
| ; RV32ZVBC32-NEXT: sw zero, 184(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 188(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 176(sp) |
| ; RV32ZVBC32-NEXT: sw s1, 180(sp) |
| ; RV32ZVBC32-NEXT: lui s3, 1 |
| ; RV32ZVBC32-NEXT: sw zero, 168(sp) |
| ; RV32ZVBC32-NEXT: sw s0, 172(sp) |
| ; RV32ZVBC32-NEXT: lui s1, 2 |
| ; RV32ZVBC32-NEXT: sw zero, 160(sp) |
| ; RV32ZVBC32-NEXT: sw t6, 164(sp) |
| ; RV32ZVBC32-NEXT: lui s0, 4 |
| ; RV32ZVBC32-NEXT: sw zero, 152(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 156(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 144(sp) |
| ; RV32ZVBC32-NEXT: sw t4, 148(sp) |
| ; RV32ZVBC32-NEXT: lui t6, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 136(sp) |
| ; RV32ZVBC32-NEXT: sw t3, 140(sp) |
| ; RV32ZVBC32-NEXT: lui t4, 32 |
| ; RV32ZVBC32-NEXT: sw zero, 128(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 132(sp) |
| ; RV32ZVBC32-NEXT: lui t3, 64 |
| ; RV32ZVBC32-NEXT: sw zero, 120(sp) |
| ; RV32ZVBC32-NEXT: sw t1, 124(sp) |
| ; RV32ZVBC32-NEXT: lui t2, 128 |
| ; RV32ZVBC32-NEXT: sw zero, 112(sp) |
| ; RV32ZVBC32-NEXT: sw t0, 116(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 104(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 108(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 96(sp) |
| ; RV32ZVBC32-NEXT: sw a5, 100(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 88(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 92(sp) |
| ; RV32ZVBC32-NEXT: lui t1, 2048 |
| ; RV32ZVBC32-NEXT: sw zero, 80(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 84(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 72(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 76(sp) |
| ; RV32ZVBC32-NEXT: lui a4, 8192 |
| ; RV32ZVBC32-NEXT: sw zero, 64(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 68(sp) |
| ; RV32ZVBC32-NEXT: lui a2, 16384 |
| ; RV32ZVBC32-NEXT: sw zero, 56(sp) |
| ; RV32ZVBC32-NEXT: sw s4, 60(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 48(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 52(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 40(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 44(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 32(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 36(sp) |
| ; RV32ZVBC32-NEXT: lui a0, 262144 |
| ; RV32ZVBC32-NEXT: sw zero, 24(sp) |
| ; RV32ZVBC32-NEXT: sw a7, 28(sp) |
| ; RV32ZVBC32-NEXT: addi a1, sp, 16 |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 272 |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 264 |
| ; RV32ZVBC32-NEXT: vlse64.v v13, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 256 |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 248 |
| ; RV32ZVBC32-NEXT: vlse64.v v15, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 240 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 232 |
| ; RV32ZVBC32-NEXT: vlse64.v v17, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 224 |
| ; RV32ZVBC32-NEXT: vlse64.v v18, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 216 |
| ; RV32ZVBC32-NEXT: vlse64.v v19, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 208 |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 200 |
| ; RV32ZVBC32-NEXT: vlse64.v v21, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 192 |
| ; RV32ZVBC32-NEXT: vlse64.v v22, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 184 |
| ; RV32ZVBC32-NEXT: vlse64.v v23, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 176 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 168 |
| ; RV32ZVBC32-NEXT: vlse64.v v25, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 160 |
| ; RV32ZVBC32-NEXT: vlse64.v v26, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 152 |
| ; RV32ZVBC32-NEXT: vlse64.v v27, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 144 |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 136 |
| ; RV32ZVBC32-NEXT: vlse64.v v29, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 128 |
| ; RV32ZVBC32-NEXT: vlse64.v v30, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 120 |
| ; RV32ZVBC32-NEXT: vlse64.v v31, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 112 |
| ; RV32ZVBC32-NEXT: vlse64.v v7, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 104 |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 96 |
| ; RV32ZVBC32-NEXT: vlse64.v v5, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 88 |
| ; RV32ZVBC32-NEXT: vlse64.v v3, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 80 |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (a1), zero |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v12, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a1, sp, 72 |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 64 |
| ; RV32ZVBC32-NEXT: vlse64.v v2, (a1), zero |
| ; RV32ZVBC32-NEXT: addi ra, sp, 56 |
| ; RV32ZVBC32-NEXT: vand.vi v1, v9, 2 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v9, 1 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v0, v1 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v9, 4 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v9, 8 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, s10 |
| ; RV32ZVBC32-NEXT: addi s10, sp, 48 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: li a1, 32 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a1 |
| ; RV32ZVBC32-NEXT: addi s9, sp, 40 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: li a1, 64 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a1 |
| ; RV32ZVBC32-NEXT: addi s7, sp, 32 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, s2 |
| ; RV32ZVBC32-NEXT: addi s5, sp, 24 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: li a1, 256 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a1 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: li a1, 512 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a1 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: li a1, 1024 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a1 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, s11 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, s3 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, s1 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, s0 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, t5 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, t6 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, t4 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, t3 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, t2 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, t0 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a6 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a5 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, t1 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a3 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a4 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a2 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, s4 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, s6 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, s8 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v1, v1, v0 |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (ra), zero |
| ; RV32ZVBC32-NEXT: vand.vv v11, v9, v11 |
| ; RV32ZVBC32-NEXT: vand.vv v12, v9, v10 |
| ; RV32ZVBC32-NEXT: vand.vv v13, v9, v13 |
| ; RV32ZVBC32-NEXT: vand.vv v14, v9, v14 |
| ; RV32ZVBC32-NEXT: vand.vv v15, v9, v15 |
| ; RV32ZVBC32-NEXT: vand.vv v16, v9, v16 |
| ; RV32ZVBC32-NEXT: vand.vv v17, v9, v17 |
| ; RV32ZVBC32-NEXT: vand.vv v18, v9, v18 |
| ; RV32ZVBC32-NEXT: vand.vv v19, v9, v19 |
| ; RV32ZVBC32-NEXT: vand.vv v20, v9, v20 |
| ; RV32ZVBC32-NEXT: vand.vv v21, v9, v21 |
| ; RV32ZVBC32-NEXT: vand.vv v22, v9, v22 |
| ; RV32ZVBC32-NEXT: vand.vv v23, v9, v23 |
| ; RV32ZVBC32-NEXT: vand.vv v24, v9, v24 |
| ; RV32ZVBC32-NEXT: vand.vv v25, v9, v25 |
| ; RV32ZVBC32-NEXT: vand.vv v26, v9, v26 |
| ; RV32ZVBC32-NEXT: vand.vv v27, v9, v27 |
| ; RV32ZVBC32-NEXT: vand.vv v28, v9, v28 |
| ; RV32ZVBC32-NEXT: vand.vv v29, v9, v29 |
| ; RV32ZVBC32-NEXT: vand.vv v30, v9, v30 |
| ; RV32ZVBC32-NEXT: vand.vv v31, v9, v31 |
| ; RV32ZVBC32-NEXT: vand.vv v7, v9, v7 |
| ; RV32ZVBC32-NEXT: vand.vv v6, v9, v6 |
| ; RV32ZVBC32-NEXT: vand.vv v5, v9, v5 |
| ; RV32ZVBC32-NEXT: vand.vv v3, v9, v3 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a1) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v10 |
| ; RV32ZVBC32-NEXT: addi a1, sp, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v4 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v2 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v0 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (s10), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v2, (s9), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s7), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (s5), zero |
| ; RV32ZVBC32-NEXT: vand.vv v4, v9, v4 |
| ; RV32ZVBC32-NEXT: vand.vv v2, v9, v2 |
| ; RV32ZVBC32-NEXT: vand.vv v0, v9, v0 |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a2, a1 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vx v9, v9, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v9, v8, v9 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v1, v9 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v11 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v13 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v14 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v15 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v17 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v19 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v21 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v22 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v23 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v25 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v26 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v27 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v29 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v30 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v31 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v7 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v6 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v5 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v3 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v2 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v8, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v8, v9, v8 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: addi sp, sp, 352 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv1i64_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: andi a1, a0, 2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 1 |
| ; RV64ZVBC32-NEXT: vsetvli a3, zero, e64, m1, ta, ma |
| ; RV64ZVBC32-NEXT: vmul.vx v9, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 4 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 8 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v10, v9 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 16 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 32 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 64 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 128 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 256 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 512 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a3, a0, 1024 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: li a1, 1 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a3 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 11 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 1 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 2 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 4 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 8 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 16 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 32 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 64 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 128 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 256 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 512 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 1024 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 2048 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 4096 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 8192 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 16384 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 32768 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 65536 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 131072 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 262144 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: srliw a2, a0, 31 |
| ; RV64ZVBC32-NEXT: slli a2, a2, 31 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 32 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 33 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 34 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 35 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 36 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 37 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 38 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 39 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 40 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 41 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 42 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 43 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 44 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 45 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 46 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 47 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 48 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 49 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 50 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 51 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 52 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 53 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 54 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 55 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 56 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 57 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 58 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 59 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 60 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 61 |
| ; RV64ZVBC32-NEXT: slli a1, a1, 62 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: and a1, a0, a1 |
| ; RV64ZVBC32-NEXT: srli a0, a0, 63 |
| ; RV64ZVBC32-NEXT: slli a0, a0, 63 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: vxor.vv v8, v9, v8 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 |
| %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer |
| %v = call <vscale x 1 x i64> @llvm.clmul.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb) |
| ret <vscale x 1 x i64> %v |
| } |
| |
| define <vscale x 2 x i64> @clmul_nxv2i64_vv(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv2i64_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -352 |
| ; RV32V-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: sub sp, sp, a0 |
| ; RV32V-NEXT: lui a1, 524288 |
| ; RV32V-NEXT: li s2, 1 |
| ; RV32V-NEXT: li a3, 2 |
| ; RV32V-NEXT: li a2, 4 |
| ; RV32V-NEXT: li s7, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: li s6, 32 |
| ; RV32V-NEXT: li s5, 64 |
| ; RV32V-NEXT: li s3, 128 |
| ; RV32V-NEXT: li s1, 256 |
| ; RV32V-NEXT: li s0, 512 |
| ; RV32V-NEXT: li t5, 1024 |
| ; RV32V-NEXT: lui ra, 1 |
| ; RV32V-NEXT: lui s8, 2 |
| ; RV32V-NEXT: lui s10, 4 |
| ; RV32V-NEXT: lui s11, 8 |
| ; RV32V-NEXT: lui s9, 16 |
| ; RV32V-NEXT: lui s4, 32 |
| ; RV32V-NEXT: lui t6, 64 |
| ; RV32V-NEXT: lui t4, 128 |
| ; RV32V-NEXT: lui t3, 256 |
| ; RV32V-NEXT: lui t2, 512 |
| ; RV32V-NEXT: lui t1, 1024 |
| ; RV32V-NEXT: lui t0, 2048 |
| ; RV32V-NEXT: lui a7, 4096 |
| ; RV32V-NEXT: lui a6, 8192 |
| ; RV32V-NEXT: lui a5, 16384 |
| ; RV32V-NEXT: lui a4, 32768 |
| ; RV32V-NEXT: sw a1, 16(sp) |
| ; RV32V-NEXT: sw zero, 20(sp) |
| ; RV32V-NEXT: sw zero, 272(sp) |
| ; RV32V-NEXT: sw s2, 276(sp) |
| ; RV32V-NEXT: sw zero, 264(sp) |
| ; RV32V-NEXT: sw a3, 268(sp) |
| ; RV32V-NEXT: lui a3, 65536 |
| ; RV32V-NEXT: sw zero, 256(sp) |
| ; RV32V-NEXT: sw a2, 260(sp) |
| ; RV32V-NEXT: lui a2, 131072 |
| ; RV32V-NEXT: sw zero, 248(sp) |
| ; RV32V-NEXT: sw s7, 252(sp) |
| ; RV32V-NEXT: vsetvli s7, zero, e64, m2, ta, ma |
| ; RV32V-NEXT: vand.vi v24, v10, 2 |
| ; RV32V-NEXT: vand.vi v20, v10, 1 |
| ; RV32V-NEXT: vand.vi v26, v10, 4 |
| ; RV32V-NEXT: vand.vi v14, v10, 8 |
| ; RV32V-NEXT: sw zero, 240(sp) |
| ; RV32V-NEXT: sw a0, 244(sp) |
| ; RV32V-NEXT: vand.vx v12, v10, a0 |
| ; RV32V-NEXT: addi s7, sp, 16 |
| ; RV32V-NEXT: sw zero, 232(sp) |
| ; RV32V-NEXT: sw s6, 236(sp) |
| ; RV32V-NEXT: vand.vx v16, v10, s6 |
| ; RV32V-NEXT: addi s6, sp, 272 |
| ; RV32V-NEXT: sw zero, 224(sp) |
| ; RV32V-NEXT: sw s5, 228(sp) |
| ; RV32V-NEXT: vand.vx v18, v10, s5 |
| ; RV32V-NEXT: addi s5, sp, 264 |
| ; RV32V-NEXT: sw zero, 216(sp) |
| ; RV32V-NEXT: sw s3, 220(sp) |
| ; RV32V-NEXT: vand.vx v0, v10, s3 |
| ; RV32V-NEXT: addi s3, sp, 256 |
| ; RV32V-NEXT: sw zero, 208(sp) |
| ; RV32V-NEXT: sw s1, 212(sp) |
| ; RV32V-NEXT: vand.vx v6, v10, s1 |
| ; RV32V-NEXT: addi s1, sp, 248 |
| ; RV32V-NEXT: sw zero, 200(sp) |
| ; RV32V-NEXT: sw s0, 204(sp) |
| ; RV32V-NEXT: vand.vx v4, v10, s0 |
| ; RV32V-NEXT: addi s0, sp, 240 |
| ; RV32V-NEXT: sw zero, 192(sp) |
| ; RV32V-NEXT: sw t5, 196(sp) |
| ; RV32V-NEXT: vand.vx v2, v10, t5 |
| ; RV32V-NEXT: slli s2, s2, 11 |
| ; RV32V-NEXT: vand.vx v28, v10, ra |
| ; RV32V-NEXT: sw zero, 184(sp) |
| ; RV32V-NEXT: sw s2, 188(sp) |
| ; RV32V-NEXT: sw zero, 176(sp) |
| ; RV32V-NEXT: sw ra, 180(sp) |
| ; RV32V-NEXT: addi t5, sp, 224 |
| ; RV32V-NEXT: vand.vx v30, v10, s8 |
| ; RV32V-NEXT: sw zero, 168(sp) |
| ; RV32V-NEXT: sw s8, 172(sp) |
| ; RV32V-NEXT: addi s8, sp, 216 |
| ; RV32V-NEXT: vand.vx v22, v10, s10 |
| ; RV32V-NEXT: sw zero, 160(sp) |
| ; RV32V-NEXT: sw s10, 164(sp) |
| ; RV32V-NEXT: addi s10, sp, 208 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v20, v20, v24 |
| ; RV32V-NEXT: vand.vx v24, v10, s11 |
| ; RV32V-NEXT: sw zero, 152(sp) |
| ; RV32V-NEXT: sw s11, 156(sp) |
| ; RV32V-NEXT: addi s11, sp, 200 |
| ; RV32V-NEXT: vmul.vv v26, v8, v26 |
| ; RV32V-NEXT: vxor.vv v20, v20, v26 |
| ; RV32V-NEXT: vand.vx v26, v10, s9 |
| ; RV32V-NEXT: sw zero, 144(sp) |
| ; RV32V-NEXT: sw s9, 148(sp) |
| ; RV32V-NEXT: addi s9, sp, 192 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v14, v20, v14 |
| ; RV32V-NEXT: vand.vx v20, v10, s4 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv ra, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, ra |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v20, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: sw zero, 136(sp) |
| ; RV32V-NEXT: sw s4, 140(sp) |
| ; RV32V-NEXT: addi s4, sp, 184 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v12, v14, v12 |
| ; RV32V-NEXT: vand.vx v14, v10, t6 |
| ; RV32V-NEXT: sw zero, 128(sp) |
| ; RV32V-NEXT: sw t6, 132(sp) |
| ; RV32V-NEXT: addi t6, sp, 176 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vand.vx v16, v10, t4 |
| ; RV32V-NEXT: sw zero, 120(sp) |
| ; RV32V-NEXT: sw t4, 124(sp) |
| ; RV32V-NEXT: addi t4, sp, 168 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v18, v12, v18 |
| ; RV32V-NEXT: vand.vx v12, v10, t3 |
| ; RV32V-NEXT: sw zero, 112(sp) |
| ; RV32V-NEXT: sw t3, 116(sp) |
| ; RV32V-NEXT: addi t3, sp, 160 |
| ; RV32V-NEXT: vmul.vv v20, v8, v0 |
| ; RV32V-NEXT: vxor.vv v18, v18, v20 |
| ; RV32V-NEXT: vand.vx v20, v10, t2 |
| ; RV32V-NEXT: sw zero, 104(sp) |
| ; RV32V-NEXT: sw t2, 108(sp) |
| ; RV32V-NEXT: addi t2, sp, 152 |
| ; RV32V-NEXT: vmul.vv v6, v8, v6 |
| ; RV32V-NEXT: vxor.vv v18, v18, v6 |
| ; RV32V-NEXT: vand.vx v6, v10, t1 |
| ; RV32V-NEXT: sw zero, 96(sp) |
| ; RV32V-NEXT: sw t1, 100(sp) |
| ; RV32V-NEXT: addi t1, sp, 144 |
| ; RV32V-NEXT: vmul.vv v4, v8, v4 |
| ; RV32V-NEXT: vxor.vv v18, v18, v4 |
| ; RV32V-NEXT: vand.vx v4, v10, t0 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv ra, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add ra, ra, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, ra |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v4, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: sw zero, 88(sp) |
| ; RV32V-NEXT: sw t0, 92(sp) |
| ; RV32V-NEXT: addi t0, sp, 136 |
| ; RV32V-NEXT: vmul.vv v2, v8, v2 |
| ; RV32V-NEXT: vxor.vv v18, v18, v2 |
| ; RV32V-NEXT: vand.vx v2, v10, s2 |
| ; RV32V-NEXT: addi ra, sp, 128 |
| ; RV32V-NEXT: vmul.vv v2, v8, v2 |
| ; RV32V-NEXT: vxor.vv v18, v18, v2 |
| ; RV32V-NEXT: vand.vx v2, v10, a7 |
| ; RV32V-NEXT: sw zero, 80(sp) |
| ; RV32V-NEXT: sw a7, 84(sp) |
| ; RV32V-NEXT: addi a7, sp, 120 |
| ; RV32V-NEXT: vmul.vv v28, v8, v28 |
| ; RV32V-NEXT: vxor.vv v18, v18, v28 |
| ; RV32V-NEXT: vand.vx v4, v10, a6 |
| ; RV32V-NEXT: sw zero, 72(sp) |
| ; RV32V-NEXT: sw a6, 76(sp) |
| ; RV32V-NEXT: addi a6, sp, 112 |
| ; RV32V-NEXT: vmul.vv v30, v8, v30 |
| ; RV32V-NEXT: vxor.vv v18, v18, v30 |
| ; RV32V-NEXT: vand.vx v30, v10, a5 |
| ; RV32V-NEXT: sw zero, 64(sp) |
| ; RV32V-NEXT: sw a5, 68(sp) |
| ; RV32V-NEXT: addi a5, sp, 104 |
| ; RV32V-NEXT: vmul.vv v22, v8, v22 |
| ; RV32V-NEXT: vxor.vv v18, v18, v22 |
| ; RV32V-NEXT: vand.vx v28, v10, a4 |
| ; RV32V-NEXT: sw zero, 56(sp) |
| ; RV32V-NEXT: sw a4, 60(sp) |
| ; RV32V-NEXT: addi a4, sp, 96 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v18, v18, v24 |
| ; RV32V-NEXT: vand.vx v24, v10, a3 |
| ; RV32V-NEXT: sw zero, 48(sp) |
| ; RV32V-NEXT: sw a3, 52(sp) |
| ; RV32V-NEXT: addi a3, sp, 88 |
| ; RV32V-NEXT: vmul.vv v26, v8, v26 |
| ; RV32V-NEXT: vxor.vv v18, v18, v26 |
| ; RV32V-NEXT: vand.vx v26, v10, a2 |
| ; RV32V-NEXT: sw zero, 40(sp) |
| ; RV32V-NEXT: sw a2, 44(sp) |
| ; RV32V-NEXT: addi a2, sp, 80 |
| ; RV32V-NEXT: sw zero, 32(sp) |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: sw a0, 36(sp) |
| ; RV32V-NEXT: sw zero, 24(sp) |
| ; RV32V-NEXT: sw a1, 28(sp) |
| ; RV32V-NEXT: addi a1, sp, 72 |
| ; RV32V-NEXT: sw a6, 4(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 3 |
| ; RV32V-NEXT: mv s2, a6 |
| ; RV32V-NEXT: slli a6, a6, 2 |
| ; RV32V-NEXT: add a6, a6, s2 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vl2r.v v22, (a6) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v0, v8, v22 |
| ; RV32V-NEXT: vxor.vv v0, v18, v0 |
| ; RV32V-NEXT: vlse64.v v18, (s7), zero |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 3 |
| ; RV32V-NEXT: mv s2, a6 |
| ; RV32V-NEXT: slli a6, a6, 2 |
| ; RV32V-NEXT: add a6, a6, s2 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs2r.v v18, (a6) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: addi s7, sp, 64 |
| ; RV32V-NEXT: vmul.vv v14, v8, v14 |
| ; RV32V-NEXT: vxor.vv v14, v0, v14 |
| ; RV32V-NEXT: vlse64.v v18, (s6), zero |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 2 |
| ; RV32V-NEXT: mv s2, a6 |
| ; RV32V-NEXT: slli a6, a6, 3 |
| ; RV32V-NEXT: add a6, a6, s2 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs2r.v v18, (a6) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: addi s2, sp, 56 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v14, v14, v16 |
| ; RV32V-NEXT: vlse64.v v16, (s5), zero |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: mv s5, a6 |
| ; RV32V-NEXT: slli a6, a6, 4 |
| ; RV32V-NEXT: add a6, a6, s5 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs2r.v v16, (a6) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: addi s5, sp, 48 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v12, v14, v12 |
| ; RV32V-NEXT: vlse64.v v14, (s3), zero |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 5 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs2r.v v14, (a6) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: addi s3, sp, 40 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v20, v12, v20 |
| ; RV32V-NEXT: vlse64.v v12, (s1), zero |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: mv s1, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add s1, s1, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add s1, s1, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add a6, a6, s1 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a6) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: addi s1, sp, 32 |
| ; RV32V-NEXT: vmul.vv v6, v8, v6 |
| ; RV32V-NEXT: vxor.vv v20, v20, v6 |
| ; RV32V-NEXT: vlse64.v v12, (s0), zero |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 2 |
| ; RV32V-NEXT: mv s0, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add s0, s0, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add a6, a6, s0 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a6) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: addi s0, sp, 24 |
| ; RV32V-NEXT: csrr s6, vlenb |
| ; RV32V-NEXT: slli s6, s6, 1 |
| ; RV32V-NEXT: mv a6, s6 |
| ; RV32V-NEXT: slli s6, s6, 1 |
| ; RV32V-NEXT: add a6, a6, s6 |
| ; RV32V-NEXT: slli s6, s6, 3 |
| ; RV32V-NEXT: add s6, s6, a6 |
| ; RV32V-NEXT: lw a6, 4(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: add s6, sp, s6 |
| ; RV32V-NEXT: addi s6, s6, 288 |
| ; RV32V-NEXT: vl2r.v v12, (s6) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v6, v8, v12 |
| ; RV32V-NEXT: vmul.vv v2, v8, v2 |
| ; RV32V-NEXT: vmul.vv v4, v8, v4 |
| ; RV32V-NEXT: vmul.vv v30, v8, v30 |
| ; RV32V-NEXT: vmul.vv v28, v8, v28 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vmul.vv v26, v8, v26 |
| ; RV32V-NEXT: vxor.vv v20, v20, v6 |
| ; RV32V-NEXT: addi s6, sp, 232 |
| ; RV32V-NEXT: vlse64.v v0, (s6), zero |
| ; RV32V-NEXT: vxor.vv v20, v20, v2 |
| ; RV32V-NEXT: vlse64.v v6, (t5), zero |
| ; RV32V-NEXT: vxor.vv v20, v20, v4 |
| ; RV32V-NEXT: vlse64.v v22, (s8), zero |
| ; RV32V-NEXT: vxor.vv v20, v20, v30 |
| ; RV32V-NEXT: vlse64.v v18, (s10), zero |
| ; RV32V-NEXT: vxor.vv v20, v20, v28 |
| ; RV32V-NEXT: vlse64.v v16, (s11), zero |
| ; RV32V-NEXT: vxor.vv v20, v20, v24 |
| ; RV32V-NEXT: vlse64.v v14, (s9), zero |
| ; RV32V-NEXT: vxor.vv v2, v20, v26 |
| ; RV32V-NEXT: vlse64.v v12, (s4), zero |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: mv s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: add t5, t5, s4 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v26, v10, v20 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: mv s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: add t5, t5, s4 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v4, v10, v20 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: mv s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 4 |
| ; RV32V-NEXT: add t5, t5, s4 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v30, v10, v20 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 5 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v20, v10, v20 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: mv s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add s4, s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add s4, s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add t5, t5, s4 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vl2r.v v24, (t5) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v28, v10, v24 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: mv s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add s4, s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add t5, t5, s4 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vl2r.v v24, (t5) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v10, v24 |
| ; RV32V-NEXT: vand.vv v0, v10, v0 |
| ; RV32V-NEXT: vand.vv v6, v10, v6 |
| ; RV32V-NEXT: vand.vv v22, v10, v22 |
| ; RV32V-NEXT: vand.vv v18, v10, v18 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs2r.v v18, (t5) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v10, v16 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: mv s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: add t5, t5, s4 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs2r.v v16, (t5) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v14, v10, v14 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: mv s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add s4, s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add s4, s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add t5, t5, s4 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs2r.v v14, (t5) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: mv s4, t5 |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: add t5, t5, s4 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs2r.v v12, (t5) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v12, (t6), zero |
| ; RV32V-NEXT: vlse64.v v14, (t4), zero |
| ; RV32V-NEXT: vlse64.v v16, (t3), zero |
| ; RV32V-NEXT: vlse64.v v18, (t2), zero |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: mv t3, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, t3 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v14 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: mv t3, t2 |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: add t2, t2, t3 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v16 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 2 |
| ; RV32V-NEXT: mv t3, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t3, t3, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, t3 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v18 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: mv t3, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t3, t3, t2 |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: add t2, t2, t3 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v12, (t1), zero |
| ; RV32V-NEXT: vlse64.v v14, (t0), zero |
| ; RV32V-NEXT: vlse64.v v16, (ra), zero |
| ; RV32V-NEXT: vlse64.v v18, (a7), zero |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v14 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 4 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v16 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v18 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 3 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v12, (a6), zero |
| ; RV32V-NEXT: vlse64.v v14, (a5), zero |
| ; RV32V-NEXT: vlse64.v v16, (a4), zero |
| ; RV32V-NEXT: vlse64.v v18, (a3), zero |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: csrr a3, vlenb |
| ; RV32V-NEXT: slli a3, a3, 1 |
| ; RV32V-NEXT: add a3, sp, a3 |
| ; RV32V-NEXT: addi a3, a3, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v14 |
| ; RV32V-NEXT: csrr a3, vlenb |
| ; RV32V-NEXT: slli a3, a3, 1 |
| ; RV32V-NEXT: mv a4, a3 |
| ; RV32V-NEXT: slli a3, a3, 1 |
| ; RV32V-NEXT: add a4, a4, a3 |
| ; RV32V-NEXT: slli a3, a3, 1 |
| ; RV32V-NEXT: add a3, a3, a4 |
| ; RV32V-NEXT: add a3, sp, a3 |
| ; RV32V-NEXT: addi a3, a3, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v16 |
| ; RV32V-NEXT: csrr a3, vlenb |
| ; RV32V-NEXT: slli a3, a3, 3 |
| ; RV32V-NEXT: mv a4, a3 |
| ; RV32V-NEXT: slli a3, a3, 1 |
| ; RV32V-NEXT: add a3, a3, a4 |
| ; RV32V-NEXT: add a3, sp, a3 |
| ; RV32V-NEXT: addi a3, a3, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v18 |
| ; RV32V-NEXT: csrr a3, vlenb |
| ; RV32V-NEXT: slli a3, a3, 1 |
| ; RV32V-NEXT: mv a4, a3 |
| ; RV32V-NEXT: slli a3, a3, 4 |
| ; RV32V-NEXT: add a3, a3, a4 |
| ; RV32V-NEXT: add a3, sp, a3 |
| ; RV32V-NEXT: addi a3, a3, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v12, (a2), zero |
| ; RV32V-NEXT: vlse64.v v14, (a1), zero |
| ; RV32V-NEXT: vlse64.v v16, (s7), zero |
| ; RV32V-NEXT: vlse64.v v18, (s2), zero |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: addi a1, sp, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v14 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: mv a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, a1, a2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v16 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: mv a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a2, a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, a1, a2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v18 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 5 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v14, (s5), zero |
| ; RV32V-NEXT: vlse64.v v16, (s3), zero |
| ; RV32V-NEXT: vlse64.v v18, (s1), zero |
| ; RV32V-NEXT: vlse64.v v12, (s0), zero |
| ; RV32V-NEXT: vand.vv v14, v10, v14 |
| ; RV32V-NEXT: vand.vv v16, v10, v16 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: mv a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, a1, a2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v18, v10, v18 |
| ; RV32V-NEXT: vand.vv v16, v10, v12 |
| ; RV32V-NEXT: vand.vx v10, v10, a0 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v10, v2, v10 |
| ; RV32V-NEXT: vmul.vv v12, v8, v26 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v4 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v30 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v20 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v28 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v24 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v0 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v6 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v22 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v14 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v18 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v8, v8, v16 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add sp, sp, a0 |
| ; RV32V-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 352 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv2i64_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e64, m2, ta, ma |
| ; RV64V-NEXT: vand.vi v12, v10, 2 |
| ; RV64V-NEXT: vand.vi v14, v10, 1 |
| ; RV64V-NEXT: vmul.vv v12, v8, v12 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v14, v12 |
| ; RV64V-NEXT: vand.vi v14, v10, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vi v14, v10, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: li a1, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: li a1, 128 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: li a1, 512 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: li a2, 1024 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a2 |
| ; RV64V-NEXT: slli a1, a0, 11 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 1 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 2 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 4 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 8 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 16 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 64 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 128 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 256 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 512 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 1024 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 2048 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 4096 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 8192 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 16384 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 32768 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 65536 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 131072 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: lui a1, 262144 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 31 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 32 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 33 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 34 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 35 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 36 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 37 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 38 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 39 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 40 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 41 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 42 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 43 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 44 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 45 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 46 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 47 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 48 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 49 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 50 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 51 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 52 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 53 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 54 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 55 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 56 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 57 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 58 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 59 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 60 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: slli a1, a0, 61 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a1 |
| ; RV64V-NEXT: li a1, -1 |
| ; RV64V-NEXT: slli a0, a0, 62 |
| ; RV64V-NEXT: slli a1, a1, 63 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vand.vx v14, v10, a0 |
| ; RV64V-NEXT: vand.vx v10, v10, a1 |
| ; RV64V-NEXT: vmul.vv v14, v8, v14 |
| ; RV64V-NEXT: vxor.vv v12, v12, v14 |
| ; RV64V-NEXT: vmul.vv v8, v8, v10 |
| ; RV64V-NEXT: vxor.vv v8, v12, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv2i64_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v8, v10 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv2i64_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v8, v10 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv2i64_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: addi sp, sp, -352 |
| ; RV32ZVBC32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: sub sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lui a1, 524288 |
| ; RV32ZVBC32-NEXT: li s2, 1 |
| ; RV32ZVBC32-NEXT: li a3, 2 |
| ; RV32ZVBC32-NEXT: li a2, 4 |
| ; RV32ZVBC32-NEXT: li s7, 8 |
| ; RV32ZVBC32-NEXT: li a0, 16 |
| ; RV32ZVBC32-NEXT: li s6, 32 |
| ; RV32ZVBC32-NEXT: li s5, 64 |
| ; RV32ZVBC32-NEXT: li s3, 128 |
| ; RV32ZVBC32-NEXT: li s1, 256 |
| ; RV32ZVBC32-NEXT: li s0, 512 |
| ; RV32ZVBC32-NEXT: li t5, 1024 |
| ; RV32ZVBC32-NEXT: lui ra, 1 |
| ; RV32ZVBC32-NEXT: lui s8, 2 |
| ; RV32ZVBC32-NEXT: lui s10, 4 |
| ; RV32ZVBC32-NEXT: lui s11, 8 |
| ; RV32ZVBC32-NEXT: lui s9, 16 |
| ; RV32ZVBC32-NEXT: lui s4, 32 |
| ; RV32ZVBC32-NEXT: lui t6, 64 |
| ; RV32ZVBC32-NEXT: lui t4, 128 |
| ; RV32ZVBC32-NEXT: lui t3, 256 |
| ; RV32ZVBC32-NEXT: lui t2, 512 |
| ; RV32ZVBC32-NEXT: lui t1, 1024 |
| ; RV32ZVBC32-NEXT: lui t0, 2048 |
| ; RV32ZVBC32-NEXT: lui a7, 4096 |
| ; RV32ZVBC32-NEXT: lui a6, 8192 |
| ; RV32ZVBC32-NEXT: lui a5, 16384 |
| ; RV32ZVBC32-NEXT: lui a4, 32768 |
| ; RV32ZVBC32-NEXT: sw a1, 16(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 20(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 272(sp) |
| ; RV32ZVBC32-NEXT: sw s2, 276(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 264(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 268(sp) |
| ; RV32ZVBC32-NEXT: lui a3, 65536 |
| ; RV32ZVBC32-NEXT: sw zero, 256(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 260(sp) |
| ; RV32ZVBC32-NEXT: lui a2, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 248(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 252(sp) |
| ; RV32ZVBC32-NEXT: vsetvli s7, zero, e64, m2, ta, ma |
| ; RV32ZVBC32-NEXT: vand.vi v24, v10, 2 |
| ; RV32ZVBC32-NEXT: vand.vi v20, v10, 1 |
| ; RV32ZVBC32-NEXT: vand.vi v26, v10, 4 |
| ; RV32ZVBC32-NEXT: vand.vi v14, v10, 8 |
| ; RV32ZVBC32-NEXT: sw zero, 240(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 244(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v12, v10, a0 |
| ; RV32ZVBC32-NEXT: addi s7, sp, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 232(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 236(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v16, v10, s6 |
| ; RV32ZVBC32-NEXT: addi s6, sp, 272 |
| ; RV32ZVBC32-NEXT: sw zero, 224(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 228(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, s5 |
| ; RV32ZVBC32-NEXT: addi s5, sp, 264 |
| ; RV32ZVBC32-NEXT: sw zero, 216(sp) |
| ; RV32ZVBC32-NEXT: sw s3, 220(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v0, v10, s3 |
| ; RV32ZVBC32-NEXT: addi s3, sp, 256 |
| ; RV32ZVBC32-NEXT: sw zero, 208(sp) |
| ; RV32ZVBC32-NEXT: sw s1, 212(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v6, v10, s1 |
| ; RV32ZVBC32-NEXT: addi s1, sp, 248 |
| ; RV32ZVBC32-NEXT: sw zero, 200(sp) |
| ; RV32ZVBC32-NEXT: sw s0, 204(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v4, v10, s0 |
| ; RV32ZVBC32-NEXT: addi s0, sp, 240 |
| ; RV32ZVBC32-NEXT: sw zero, 192(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 196(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v2, v10, t5 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 11 |
| ; RV32ZVBC32-NEXT: vand.vx v28, v10, ra |
| ; RV32ZVBC32-NEXT: sw zero, 184(sp) |
| ; RV32ZVBC32-NEXT: sw s2, 188(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 176(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 180(sp) |
| ; RV32ZVBC32-NEXT: addi t5, sp, 224 |
| ; RV32ZVBC32-NEXT: vand.vx v30, v10, s8 |
| ; RV32ZVBC32-NEXT: sw zero, 168(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 172(sp) |
| ; RV32ZVBC32-NEXT: addi s8, sp, 216 |
| ; RV32ZVBC32-NEXT: vand.vx v22, v10, s10 |
| ; RV32ZVBC32-NEXT: sw zero, 160(sp) |
| ; RV32ZVBC32-NEXT: sw s10, 164(sp) |
| ; RV32ZVBC32-NEXT: addi s10, sp, 208 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v24 |
| ; RV32ZVBC32-NEXT: vand.vx v24, v10, s11 |
| ; RV32ZVBC32-NEXT: sw zero, 152(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 156(sp) |
| ; RV32ZVBC32-NEXT: addi s11, sp, 200 |
| ; RV32ZVBC32-NEXT: vmul.vv v26, v8, v26 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v26 |
| ; RV32ZVBC32-NEXT: vand.vx v26, v10, s9 |
| ; RV32ZVBC32-NEXT: sw zero, 144(sp) |
| ; RV32ZVBC32-NEXT: sw s9, 148(sp) |
| ; RV32ZVBC32-NEXT: addi s9, sp, 192 |
| ; RV32ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC32-NEXT: vxor.vv v14, v20, v14 |
| ; RV32ZVBC32-NEXT: vand.vx v20, v10, s4 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv ra, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, ra |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v20, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw zero, 136(sp) |
| ; RV32ZVBC32-NEXT: sw s4, 140(sp) |
| ; RV32ZVBC32-NEXT: addi s4, sp, 184 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v14, v12 |
| ; RV32ZVBC32-NEXT: vand.vx v14, v10, t6 |
| ; RV32ZVBC32-NEXT: sw zero, 128(sp) |
| ; RV32ZVBC32-NEXT: sw t6, 132(sp) |
| ; RV32ZVBC32-NEXT: addi t6, sp, 176 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: vand.vx v16, v10, t4 |
| ; RV32ZVBC32-NEXT: sw zero, 120(sp) |
| ; RV32ZVBC32-NEXT: sw t4, 124(sp) |
| ; RV32ZVBC32-NEXT: addi t4, sp, 168 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v12, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v12, v10, t3 |
| ; RV32ZVBC32-NEXT: sw zero, 112(sp) |
| ; RV32ZVBC32-NEXT: sw t3, 116(sp) |
| ; RV32ZVBC32-NEXT: addi t3, sp, 160 |
| ; RV32ZVBC32-NEXT: vmul.vv v20, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v18, v20 |
| ; RV32ZVBC32-NEXT: vand.vx v20, v10, t2 |
| ; RV32ZVBC32-NEXT: sw zero, 104(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 108(sp) |
| ; RV32ZVBC32-NEXT: addi t2, sp, 152 |
| ; RV32ZVBC32-NEXT: vmul.vv v6, v8, v6 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v18, v6 |
| ; RV32ZVBC32-NEXT: vand.vx v6, v10, t1 |
| ; RV32ZVBC32-NEXT: sw zero, 96(sp) |
| ; RV32ZVBC32-NEXT: sw t1, 100(sp) |
| ; RV32ZVBC32-NEXT: addi t1, sp, 144 |
| ; RV32ZVBC32-NEXT: vmul.vv v4, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v18, v4 |
| ; RV32ZVBC32-NEXT: vand.vx v4, v10, t0 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv ra, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add ra, ra, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, ra |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v4, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw zero, 88(sp) |
| ; RV32ZVBC32-NEXT: sw t0, 92(sp) |
| ; RV32ZVBC32-NEXT: addi t0, sp, 136 |
| ; RV32ZVBC32-NEXT: vmul.vv v2, v8, v2 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v18, v2 |
| ; RV32ZVBC32-NEXT: vand.vx v2, v10, s2 |
| ; RV32ZVBC32-NEXT: addi ra, sp, 128 |
| ; RV32ZVBC32-NEXT: vmul.vv v2, v8, v2 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v18, v2 |
| ; RV32ZVBC32-NEXT: vand.vx v2, v10, a7 |
| ; RV32ZVBC32-NEXT: sw zero, 80(sp) |
| ; RV32ZVBC32-NEXT: sw a7, 84(sp) |
| ; RV32ZVBC32-NEXT: addi a7, sp, 120 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v18, v28 |
| ; RV32ZVBC32-NEXT: vand.vx v4, v10, a6 |
| ; RV32ZVBC32-NEXT: sw zero, 72(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 76(sp) |
| ; RV32ZVBC32-NEXT: addi a6, sp, 112 |
| ; RV32ZVBC32-NEXT: vmul.vv v30, v8, v30 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v18, v30 |
| ; RV32ZVBC32-NEXT: vand.vx v30, v10, a5 |
| ; RV32ZVBC32-NEXT: sw zero, 64(sp) |
| ; RV32ZVBC32-NEXT: sw a5, 68(sp) |
| ; RV32ZVBC32-NEXT: addi a5, sp, 104 |
| ; RV32ZVBC32-NEXT: vmul.vv v22, v8, v22 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v18, v22 |
| ; RV32ZVBC32-NEXT: vand.vx v28, v10, a4 |
| ; RV32ZVBC32-NEXT: sw zero, 56(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 60(sp) |
| ; RV32ZVBC32-NEXT: addi a4, sp, 96 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v18, v24 |
| ; RV32ZVBC32-NEXT: vand.vx v24, v10, a3 |
| ; RV32ZVBC32-NEXT: sw zero, 48(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 52(sp) |
| ; RV32ZVBC32-NEXT: addi a3, sp, 88 |
| ; RV32ZVBC32-NEXT: vmul.vv v26, v8, v26 |
| ; RV32ZVBC32-NEXT: vxor.vv v18, v18, v26 |
| ; RV32ZVBC32-NEXT: vand.vx v26, v10, a2 |
| ; RV32ZVBC32-NEXT: sw zero, 40(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 44(sp) |
| ; RV32ZVBC32-NEXT: addi a2, sp, 80 |
| ; RV32ZVBC32-NEXT: sw zero, 32(sp) |
| ; RV32ZVBC32-NEXT: lui a0, 262144 |
| ; RV32ZVBC32-NEXT: sw a0, 36(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 24(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 28(sp) |
| ; RV32ZVBC32-NEXT: addi a1, sp, 72 |
| ; RV32ZVBC32-NEXT: sw a6, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 3 |
| ; RV32ZVBC32-NEXT: mv s2, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 2 |
| ; RV32ZVBC32-NEXT: add a6, a6, s2 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v22, (a6) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v22 |
| ; RV32ZVBC32-NEXT: vxor.vv v0, v18, v0 |
| ; RV32ZVBC32-NEXT: vlse64.v v18, (s7), zero |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 3 |
| ; RV32ZVBC32-NEXT: mv s2, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 2 |
| ; RV32ZVBC32-NEXT: add a6, a6, s2 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v18, (a6) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s7, sp, 64 |
| ; RV32ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV32ZVBC32-NEXT: vxor.vv v14, v0, v14 |
| ; RV32ZVBC32-NEXT: vlse64.v v18, (s6), zero |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 2 |
| ; RV32ZVBC32-NEXT: mv s2, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 3 |
| ; RV32ZVBC32-NEXT: add a6, a6, s2 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v18, (a6) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s2, sp, 56 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v14, v14, v16 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (s5), zero |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: mv s5, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 4 |
| ; RV32ZVBC32-NEXT: add a6, a6, s5 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v16, (a6) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s5, sp, 48 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v14, v12 |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (s3), zero |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 5 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v14, (a6) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s3, sp, 40 |
| ; RV32ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v12, v20 |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s1), zero |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: mv s1, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add a6, a6, s1 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a6) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s1, sp, 32 |
| ; RV32ZVBC32-NEXT: vmul.vv v6, v8, v6 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v6 |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s0), zero |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 2 |
| ; RV32ZVBC32-NEXT: mv s0, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add s0, s0, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add a6, a6, s0 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a6) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s0, sp, 24 |
| ; RV32ZVBC32-NEXT: csrr s6, vlenb |
| ; RV32ZVBC32-NEXT: slli s6, s6, 1 |
| ; RV32ZVBC32-NEXT: mv a6, s6 |
| ; RV32ZVBC32-NEXT: slli s6, s6, 1 |
| ; RV32ZVBC32-NEXT: add a6, a6, s6 |
| ; RV32ZVBC32-NEXT: slli s6, s6, 3 |
| ; RV32ZVBC32-NEXT: add s6, s6, a6 |
| ; RV32ZVBC32-NEXT: lw a6, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: add s6, sp, s6 |
| ; RV32ZVBC32-NEXT: addi s6, s6, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (s6) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v6, v8, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v2, v8, v2 |
| ; RV32ZVBC32-NEXT: vmul.vv v4, v8, v4 |
| ; RV32ZVBC32-NEXT: vmul.vv v30, v8, v30 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v28 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vmul.vv v26, v8, v26 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v6 |
| ; RV32ZVBC32-NEXT: addi s6, sp, 232 |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s6), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v2 |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (t5), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v4 |
| ; RV32ZVBC32-NEXT: vlse64.v v22, (s8), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v30 |
| ; RV32ZVBC32-NEXT: vlse64.v v18, (s10), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v28 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (s11), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v24 |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (s9), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v20, v26 |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s4), zero |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: mv s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: add t5, t5, s4 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v26, v10, v20 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: mv s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: add t5, t5, s4 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v4, v10, v20 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: mv s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 4 |
| ; RV32ZVBC32-NEXT: add t5, t5, s4 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v30, v10, v20 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 5 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v20, v10, v20 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: mv s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add s4, s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add s4, s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add t5, t5, s4 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v24, (t5) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v28, v10, v24 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: mv s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add s4, s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add t5, t5, s4 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v24, (t5) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v10, v24 |
| ; RV32ZVBC32-NEXT: vand.vv v0, v10, v0 |
| ; RV32ZVBC32-NEXT: vand.vv v6, v10, v6 |
| ; RV32ZVBC32-NEXT: vand.vv v22, v10, v22 |
| ; RV32ZVBC32-NEXT: vand.vv v18, v10, v18 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v18, (t5) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v10, v16 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: mv s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: add t5, t5, s4 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v16, (t5) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v14, v10, v14 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: mv s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add s4, s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add s4, s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add t5, t5, s4 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v14, (t5) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: mv s4, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: add t5, t5, s4 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (t5) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (t6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (t4), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (t3), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v18, (t2), zero |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: mv t3, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, t3 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v14 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: mv t3, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: add t2, t2, t3 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v16 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 2 |
| ; RV32ZVBC32-NEXT: mv t3, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t3, t3, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, t3 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v18 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: mv t3, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t3, t3, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: add t2, t2, t3 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (t1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (t0), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (ra), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v18, (a7), zero |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v14 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 4 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v16 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v18 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 3 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (a6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (a5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a4), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v18, (a3), zero |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a3, vlenb |
| ; RV32ZVBC32-NEXT: slli a3, a3, 1 |
| ; RV32ZVBC32-NEXT: add a3, sp, a3 |
| ; RV32ZVBC32-NEXT: addi a3, a3, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v14 |
| ; RV32ZVBC32-NEXT: csrr a3, vlenb |
| ; RV32ZVBC32-NEXT: slli a3, a3, 1 |
| ; RV32ZVBC32-NEXT: mv a4, a3 |
| ; RV32ZVBC32-NEXT: slli a3, a3, 1 |
| ; RV32ZVBC32-NEXT: add a4, a4, a3 |
| ; RV32ZVBC32-NEXT: slli a3, a3, 1 |
| ; RV32ZVBC32-NEXT: add a3, a3, a4 |
| ; RV32ZVBC32-NEXT: add a3, sp, a3 |
| ; RV32ZVBC32-NEXT: addi a3, a3, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v16 |
| ; RV32ZVBC32-NEXT: csrr a3, vlenb |
| ; RV32ZVBC32-NEXT: slli a3, a3, 3 |
| ; RV32ZVBC32-NEXT: mv a4, a3 |
| ; RV32ZVBC32-NEXT: slli a3, a3, 1 |
| ; RV32ZVBC32-NEXT: add a3, a3, a4 |
| ; RV32ZVBC32-NEXT: add a3, sp, a3 |
| ; RV32ZVBC32-NEXT: addi a3, a3, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v18 |
| ; RV32ZVBC32-NEXT: csrr a3, vlenb |
| ; RV32ZVBC32-NEXT: slli a3, a3, 1 |
| ; RV32ZVBC32-NEXT: mv a4, a3 |
| ; RV32ZVBC32-NEXT: slli a3, a3, 4 |
| ; RV32ZVBC32-NEXT: add a3, a3, a4 |
| ; RV32ZVBC32-NEXT: add a3, sp, a3 |
| ; RV32ZVBC32-NEXT: addi a3, a3, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (a2), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (a1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (s7), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v18, (s2), zero |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: addi a1, sp, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v14 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: mv a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v16 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: mv a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v18 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 5 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (s5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (s3), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v18, (s1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s0), zero |
| ; RV32ZVBC32-NEXT: vand.vv v14, v10, v14 |
| ; RV32ZVBC32-NEXT: vand.vv v16, v10, v16 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: mv a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v18, v10, v18 |
| ; RV32ZVBC32-NEXT: vand.vv v16, v10, v12 |
| ; RV32ZVBC32-NEXT: vand.vx v10, v10, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v2, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v26 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v30 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v6 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v22 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v14 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v8, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v8, v10, v8 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: addi sp, sp, 352 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv2i64_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e64, m2, ta, ma |
| ; RV64ZVBC32-NEXT: vand.vi v12, v10, 2 |
| ; RV64ZVBC32-NEXT: vand.vi v14, v10, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v14, v12 |
| ; RV64ZVBC32-NEXT: vand.vi v14, v10, 4 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vi v14, v10, 8 |
| ; RV64ZVBC32-NEXT: li a0, 16 |
| ; RV64ZVBC32-NEXT: li a1, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a0 |
| ; RV64ZVBC32-NEXT: li a0, 64 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: li a1, 128 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a0 |
| ; RV64ZVBC32-NEXT: li a0, 256 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: li a1, 512 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a0 |
| ; RV64ZVBC32-NEXT: li a2, 1024 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: li a0, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a2 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 11 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 2 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 4 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 8 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 16 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 64 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 128 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 256 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 512 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 1024 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 2048 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 4096 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 8192 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 16384 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 32768 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 65536 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 131072 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 262144 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 31 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 33 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 34 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 35 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 36 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 37 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 38 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 39 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 40 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 41 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 42 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 43 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 44 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 45 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 46 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 47 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 48 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 49 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 50 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 51 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 52 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 53 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 54 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 55 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 56 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 57 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 58 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 59 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 60 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 61 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a1 |
| ; RV64ZVBC32-NEXT: li a1, -1 |
| ; RV64ZVBC32-NEXT: slli a0, a0, 62 |
| ; RV64ZVBC32-NEXT: slli a1, a1, 63 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vand.vx v14, v10, a0 |
| ; RV64ZVBC32-NEXT: vand.vx v10, v10, a1 |
| ; RV64ZVBC32-NEXT: vmul.vv v14, v8, v14 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v14 |
| ; RV64ZVBC32-NEXT: vmul.vv v8, v8, v10 |
| ; RV64ZVBC32-NEXT: vxor.vv v8, v12, v8 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 2 x i64> @llvm.clmul.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) |
| ret <vscale x 2 x i64> %v |
| } |
| |
| define <vscale x 2 x i64> @clmul_nxv2i64_vx(<vscale x 2 x i64> %va, i64 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv2i64_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -352 |
| ; RV32V-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 2 |
| ; RV32V-NEXT: mv a3, a2 |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: add a3, a3, a2 |
| ; RV32V-NEXT: slli a2, a2, 2 |
| ; RV32V-NEXT: add a2, a2, a3 |
| ; RV32V-NEXT: sub sp, sp, a2 |
| ; RV32V-NEXT: sw a0, 8(sp) |
| ; RV32V-NEXT: sw a1, 12(sp) |
| ; RV32V-NEXT: addi a0, sp, 8 |
| ; RV32V-NEXT: lui s5, 524288 |
| ; RV32V-NEXT: li s11, 1 |
| ; RV32V-NEXT: li t2, 2 |
| ; RV32V-NEXT: li s6, 4 |
| ; RV32V-NEXT: li ra, 8 |
| ; RV32V-NEXT: li s10, 128 |
| ; RV32V-NEXT: li s9, 256 |
| ; RV32V-NEXT: li s8, 512 |
| ; RV32V-NEXT: li s7, 1024 |
| ; RV32V-NEXT: lui s4, 1 |
| ; RV32V-NEXT: lui s3, 2 |
| ; RV32V-NEXT: lui s2, 4 |
| ; RV32V-NEXT: lui s1, 8 |
| ; RV32V-NEXT: lui s0, 16 |
| ; RV32V-NEXT: lui t6, 32 |
| ; RV32V-NEXT: lui t5, 64 |
| ; RV32V-NEXT: lui t4, 128 |
| ; RV32V-NEXT: lui t3, 256 |
| ; RV32V-NEXT: lui t0, 512 |
| ; RV32V-NEXT: lui a7, 1024 |
| ; RV32V-NEXT: lui a6, 2048 |
| ; RV32V-NEXT: lui a5, 4096 |
| ; RV32V-NEXT: lui a4, 8192 |
| ; RV32V-NEXT: lui a3, 16384 |
| ; RV32V-NEXT: vsetvli a1, zero, e64, m2, ta, ma |
| ; RV32V-NEXT: vlse64.v v10, (a0), zero |
| ; RV32V-NEXT: lui a2, 32768 |
| ; RV32V-NEXT: sw s5, 16(sp) |
| ; RV32V-NEXT: lui t1, 524288 |
| ; RV32V-NEXT: sw zero, 20(sp) |
| ; RV32V-NEXT: sw zero, 272(sp) |
| ; RV32V-NEXT: sw s11, 276(sp) |
| ; RV32V-NEXT: sw zero, 264(sp) |
| ; RV32V-NEXT: sw t2, 268(sp) |
| ; RV32V-NEXT: lui a0, 65536 |
| ; RV32V-NEXT: sw zero, 256(sp) |
| ; RV32V-NEXT: sw s6, 260(sp) |
| ; RV32V-NEXT: lui t2, 131072 |
| ; RV32V-NEXT: sw zero, 248(sp) |
| ; RV32V-NEXT: sw ra, 252(sp) |
| ; RV32V-NEXT: lui a1, 262144 |
| ; RV32V-NEXT: sw zero, 240(sp) |
| ; RV32V-NEXT: li s6, 16 |
| ; RV32V-NEXT: sw s6, 244(sp) |
| ; RV32V-NEXT: li s6, 16 |
| ; RV32V-NEXT: sw zero, 232(sp) |
| ; RV32V-NEXT: li s5, 32 |
| ; RV32V-NEXT: sw s5, 236(sp) |
| ; RV32V-NEXT: li s5, 32 |
| ; RV32V-NEXT: sw zero, 224(sp) |
| ; RV32V-NEXT: li ra, 64 |
| ; RV32V-NEXT: sw ra, 228(sp) |
| ; RV32V-NEXT: sw zero, 216(sp) |
| ; RV32V-NEXT: sw s10, 220(sp) |
| ; RV32V-NEXT: sw zero, 208(sp) |
| ; RV32V-NEXT: sw s9, 212(sp) |
| ; RV32V-NEXT: sw zero, 200(sp) |
| ; RV32V-NEXT: sw s8, 204(sp) |
| ; RV32V-NEXT: sw zero, 192(sp) |
| ; RV32V-NEXT: sw s7, 196(sp) |
| ; RV32V-NEXT: slli s11, s11, 11 |
| ; RV32V-NEXT: sw zero, 184(sp) |
| ; RV32V-NEXT: sw s11, 188(sp) |
| ; RV32V-NEXT: sw zero, 176(sp) |
| ; RV32V-NEXT: sw s4, 180(sp) |
| ; RV32V-NEXT: sw zero, 168(sp) |
| ; RV32V-NEXT: sw s3, 172(sp) |
| ; RV32V-NEXT: sw zero, 160(sp) |
| ; RV32V-NEXT: sw s2, 164(sp) |
| ; RV32V-NEXT: sw zero, 152(sp) |
| ; RV32V-NEXT: sw s1, 156(sp) |
| ; RV32V-NEXT: sw zero, 144(sp) |
| ; RV32V-NEXT: sw s0, 148(sp) |
| ; RV32V-NEXT: sw zero, 136(sp) |
| ; RV32V-NEXT: sw t6, 140(sp) |
| ; RV32V-NEXT: sw zero, 128(sp) |
| ; RV32V-NEXT: sw t5, 132(sp) |
| ; RV32V-NEXT: sw zero, 120(sp) |
| ; RV32V-NEXT: sw t4, 124(sp) |
| ; RV32V-NEXT: sw zero, 112(sp) |
| ; RV32V-NEXT: sw t3, 116(sp) |
| ; RV32V-NEXT: sw zero, 104(sp) |
| ; RV32V-NEXT: sw t0, 108(sp) |
| ; RV32V-NEXT: sw zero, 96(sp) |
| ; RV32V-NEXT: sw a7, 100(sp) |
| ; RV32V-NEXT: sw zero, 88(sp) |
| ; RV32V-NEXT: sw a6, 92(sp) |
| ; RV32V-NEXT: sw zero, 80(sp) |
| ; RV32V-NEXT: sw a5, 84(sp) |
| ; RV32V-NEXT: sw zero, 72(sp) |
| ; RV32V-NEXT: sw a4, 76(sp) |
| ; RV32V-NEXT: sw zero, 64(sp) |
| ; RV32V-NEXT: sw a3, 68(sp) |
| ; RV32V-NEXT: sw zero, 56(sp) |
| ; RV32V-NEXT: sw a2, 60(sp) |
| ; RV32V-NEXT: sw zero, 48(sp) |
| ; RV32V-NEXT: sw a0, 52(sp) |
| ; RV32V-NEXT: sw zero, 40(sp) |
| ; RV32V-NEXT: sw t2, 44(sp) |
| ; RV32V-NEXT: sw zero, 32(sp) |
| ; RV32V-NEXT: sw a1, 36(sp) |
| ; RV32V-NEXT: sw zero, 24(sp) |
| ; RV32V-NEXT: sw t1, 28(sp) |
| ; RV32V-NEXT: addi a1, sp, 16 |
| ; RV32V-NEXT: vlse64.v v12, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 272 |
| ; RV32V-NEXT: vlse64.v v22, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 264 |
| ; RV32V-NEXT: vlse64.v v14, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 256 |
| ; RV32V-NEXT: vlse64.v v2, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 248 |
| ; RV32V-NEXT: vlse64.v v26, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 240 |
| ; RV32V-NEXT: vlse64.v v28, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 232 |
| ; RV32V-NEXT: vlse64.v v30, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 224 |
| ; RV32V-NEXT: vlse64.v v16, (a1), zero |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: mv a5, a1 |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a5, a5, a1 |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, a1, a5 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: addi a1, sp, 216 |
| ; RV32V-NEXT: vlse64.v v16, (a1), zero |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: mv a5, a1 |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, a1, a5 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: addi a1, sp, 208 |
| ; RV32V-NEXT: vlse64.v v16, (a1), zero |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: mv a5, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a5, a5, a1 |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: add a1, a1, a5 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: addi a1, sp, 200 |
| ; RV32V-NEXT: vlse64.v v6, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 192 |
| ; RV32V-NEXT: vlse64.v v4, (a1), zero |
| ; RV32V-NEXT: addi ra, sp, 184 |
| ; RV32V-NEXT: vand.vi v16, v10, 2 |
| ; RV32V-NEXT: vand.vi v18, v10, 1 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v18, v16 |
| ; RV32V-NEXT: vand.vi v18, v10, 4 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vi v18, v10, 8 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vx v18, v10, s6 |
| ; RV32V-NEXT: addi s10, sp, 176 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vx v18, v10, s5 |
| ; RV32V-NEXT: addi s9, sp, 168 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: li a1, 64 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi s8, sp, 160 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: li a1, 128 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi s7, sp, 152 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: li a1, 256 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi s6, sp, 144 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: li a1, 512 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi s5, sp, 136 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: li a1, 1024 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi s4, sp, 128 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vx v18, v10, s11 |
| ; RV32V-NEXT: addi s11, sp, 120 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a1, 1 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi s3, sp, 112 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a1, 2 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi s2, sp, 104 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a1, 4 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi s1, sp, 96 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a1, 8 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi s0, sp, 88 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a1, 16 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi t6, sp, 80 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a1, 32 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi t5, sp, 72 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a1, 64 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi t4, sp, 64 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a1, 128 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi t3, sp, 56 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a1, 256 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: addi t2, sp, 48 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vx v18, v10, t0 |
| ; RV32V-NEXT: addi t1, sp, 40 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vx v18, v10, a7 |
| ; RV32V-NEXT: addi a7, sp, 32 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vx v18, v10, a6 |
| ; RV32V-NEXT: addi a5, sp, 24 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a1, 4096 |
| ; RV32V-NEXT: vand.vx v18, v10, a1 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vx v18, v10, a4 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vx v18, v10, a3 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vx v18, v10, a2 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: vand.vx v18, v10, a0 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: lui a0, 131072 |
| ; RV32V-NEXT: vand.vx v18, v10, a0 |
| ; RV32V-NEXT: vmul.vv v18, v8, v18 |
| ; RV32V-NEXT: vxor.vv v20, v16, v18 |
| ; RV32V-NEXT: vlse64.v v0, (ra), zero |
| ; RV32V-NEXT: vand.vv v16, v10, v12 |
| ; RV32V-NEXT: vand.vv v18, v10, v22 |
| ; RV32V-NEXT: vand.vv v22, v10, v14 |
| ; RV32V-NEXT: vand.vv v24, v10, v2 |
| ; RV32V-NEXT: vand.vv v26, v10, v26 |
| ; RV32V-NEXT: vand.vv v28, v10, v28 |
| ; RV32V-NEXT: vand.vv v30, v10, v30 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v2, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v6 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v4 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v0 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v6, (s10), zero |
| ; RV32V-NEXT: vlse64.v v4, (s9), zero |
| ; RV32V-NEXT: vlse64.v v12, (s8), zero |
| ; RV32V-NEXT: vlse64.v v14, (s7), zero |
| ; RV32V-NEXT: vand.vv v0, v10, v6 |
| ; RV32V-NEXT: vand.vv v6, v10, v4 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v14 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v12, (s6), zero |
| ; RV32V-NEXT: vlse64.v v14, (s5), zero |
| ; RV32V-NEXT: vlse64.v v6, (s4), zero |
| ; RV32V-NEXT: vlse64.v v4, (s11), zero |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v14 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v6 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v4 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v12, (s3), zero |
| ; RV32V-NEXT: vlse64.v v14, (s2), zero |
| ; RV32V-NEXT: vlse64.v v6, (s1), zero |
| ; RV32V-NEXT: vlse64.v v4, (s0), zero |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v14 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v6 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v4 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v12, (t6), zero |
| ; RV32V-NEXT: vlse64.v v14, (t5), zero |
| ; RV32V-NEXT: vlse64.v v4, (t4), zero |
| ; RV32V-NEXT: vlse64.v v6, (t3), zero |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v14 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v4 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v6 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v14, (t2), zero |
| ; RV32V-NEXT: vlse64.v v6, (t1), zero |
| ; RV32V-NEXT: vlse64.v v12, (a7), zero |
| ; RV32V-NEXT: vlse64.v v4, (a5), zero |
| ; RV32V-NEXT: vand.vv v14, v10, v14 |
| ; RV32V-NEXT: vand.vv v6, v10, v6 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v12, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v6, v10, v4 |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: vand.vx v10, v10, a0 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v10, v20, v10 |
| ; RV32V-NEXT: vmul.vv v12, v8, v16 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v18 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v22 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v24 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v26 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v28 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v30 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v2 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v0 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v14 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v10, v10, v12 |
| ; RV32V-NEXT: vmul.vv v8, v8, v6 |
| ; RV32V-NEXT: vxor.vv v8, v10, v8 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add sp, sp, a0 |
| ; RV32V-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 352 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv2i64_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: andi a1, a0, 2 |
| ; RV64V-NEXT: andi a2, a0, 1 |
| ; RV64V-NEXT: vsetvli a3, zero, e64, m2, ta, ma |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 4 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 8 |
| ; RV64V-NEXT: vxor.vv v10, v12, v10 |
| ; RV64V-NEXT: vmul.vx v12, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 16 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 32 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 64 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 128 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 256 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 512 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a1 |
| ; RV64V-NEXT: andi a3, a0, 1024 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: li a1, 1 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a3 |
| ; RV64V-NEXT: slli a2, a1, 11 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 1 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 2 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 4 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 8 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 16 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 32 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 64 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 128 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 256 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 512 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 1024 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 2048 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 4096 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 8192 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 16384 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 32768 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 65536 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 131072 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: lui a2, 262144 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: srliw a2, a0, 31 |
| ; RV64V-NEXT: slli a2, a2, 31 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 32 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 33 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 34 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 35 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 36 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 37 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 38 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 39 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 40 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 41 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 42 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 43 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 44 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 45 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 46 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 47 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 48 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 49 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 50 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 51 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 52 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 53 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 54 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 55 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 56 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 57 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 58 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 59 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 60 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 61 |
| ; RV64V-NEXT: slli a1, a1, 62 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: and a1, a0, a1 |
| ; RV64V-NEXT: srli a0, a0, 63 |
| ; RV64V-NEXT: slli a0, a0, 63 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a2 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v12, v8, a1 |
| ; RV64V-NEXT: vxor.vv v10, v10, v12 |
| ; RV64V-NEXT: vmul.vx v8, v8, a0 |
| ; RV64V-NEXT: vxor.vv v8, v10, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv2i64_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: addi sp, sp, -16 |
| ; RV32ZVBC64-NEXT: sw a0, 8(sp) |
| ; RV32ZVBC64-NEXT: sw a1, 12(sp) |
| ; RV32ZVBC64-NEXT: addi a0, sp, 8 |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e64, m2, ta, ma |
| ; RV32ZVBC64-NEXT: vlse64.v v10, (a0), zero |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v8, v10 |
| ; RV32ZVBC64-NEXT: addi sp, sp, 16 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv2i64_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e64, m2, ta, ma |
| ; RV64ZVBC64-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv2i64_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: addi sp, sp, -352 |
| ; RV32ZVBC32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 2 |
| ; RV32ZVBC32-NEXT: mv a3, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: add a3, a3, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 2 |
| ; RV32ZVBC32-NEXT: add a2, a2, a3 |
| ; RV32ZVBC32-NEXT: sub sp, sp, a2 |
| ; RV32ZVBC32-NEXT: sw a0, 8(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 12(sp) |
| ; RV32ZVBC32-NEXT: addi a0, sp, 8 |
| ; RV32ZVBC32-NEXT: lui s5, 524288 |
| ; RV32ZVBC32-NEXT: li s11, 1 |
| ; RV32ZVBC32-NEXT: li t2, 2 |
| ; RV32ZVBC32-NEXT: li s6, 4 |
| ; RV32ZVBC32-NEXT: li ra, 8 |
| ; RV32ZVBC32-NEXT: li s10, 128 |
| ; RV32ZVBC32-NEXT: li s9, 256 |
| ; RV32ZVBC32-NEXT: li s8, 512 |
| ; RV32ZVBC32-NEXT: li s7, 1024 |
| ; RV32ZVBC32-NEXT: lui s4, 1 |
| ; RV32ZVBC32-NEXT: lui s3, 2 |
| ; RV32ZVBC32-NEXT: lui s2, 4 |
| ; RV32ZVBC32-NEXT: lui s1, 8 |
| ; RV32ZVBC32-NEXT: lui s0, 16 |
| ; RV32ZVBC32-NEXT: lui t6, 32 |
| ; RV32ZVBC32-NEXT: lui t5, 64 |
| ; RV32ZVBC32-NEXT: lui t4, 128 |
| ; RV32ZVBC32-NEXT: lui t3, 256 |
| ; RV32ZVBC32-NEXT: lui t0, 512 |
| ; RV32ZVBC32-NEXT: lui a7, 1024 |
| ; RV32ZVBC32-NEXT: lui a6, 2048 |
| ; RV32ZVBC32-NEXT: lui a5, 4096 |
| ; RV32ZVBC32-NEXT: lui a4, 8192 |
| ; RV32ZVBC32-NEXT: lui a3, 16384 |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e64, m2, ta, ma |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (a0), zero |
| ; RV32ZVBC32-NEXT: lui a2, 32768 |
| ; RV32ZVBC32-NEXT: sw s5, 16(sp) |
| ; RV32ZVBC32-NEXT: lui t1, 524288 |
| ; RV32ZVBC32-NEXT: sw zero, 20(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 272(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 276(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 264(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 268(sp) |
| ; RV32ZVBC32-NEXT: lui a0, 65536 |
| ; RV32ZVBC32-NEXT: sw zero, 256(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 260(sp) |
| ; RV32ZVBC32-NEXT: lui t2, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 248(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 252(sp) |
| ; RV32ZVBC32-NEXT: lui a1, 262144 |
| ; RV32ZVBC32-NEXT: sw zero, 240(sp) |
| ; RV32ZVBC32-NEXT: li s6, 16 |
| ; RV32ZVBC32-NEXT: sw s6, 244(sp) |
| ; RV32ZVBC32-NEXT: li s6, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 232(sp) |
| ; RV32ZVBC32-NEXT: li s5, 32 |
| ; RV32ZVBC32-NEXT: sw s5, 236(sp) |
| ; RV32ZVBC32-NEXT: li s5, 32 |
| ; RV32ZVBC32-NEXT: sw zero, 224(sp) |
| ; RV32ZVBC32-NEXT: li ra, 64 |
| ; RV32ZVBC32-NEXT: sw ra, 228(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 216(sp) |
| ; RV32ZVBC32-NEXT: sw s10, 220(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 208(sp) |
| ; RV32ZVBC32-NEXT: sw s9, 212(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 200(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 204(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 192(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 196(sp) |
| ; RV32ZVBC32-NEXT: slli s11, s11, 11 |
| ; RV32ZVBC32-NEXT: sw zero, 184(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 188(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 176(sp) |
| ; RV32ZVBC32-NEXT: sw s4, 180(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 168(sp) |
| ; RV32ZVBC32-NEXT: sw s3, 172(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 160(sp) |
| ; RV32ZVBC32-NEXT: sw s2, 164(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 152(sp) |
| ; RV32ZVBC32-NEXT: sw s1, 156(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 144(sp) |
| ; RV32ZVBC32-NEXT: sw s0, 148(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 136(sp) |
| ; RV32ZVBC32-NEXT: sw t6, 140(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 128(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 132(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 120(sp) |
| ; RV32ZVBC32-NEXT: sw t4, 124(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 112(sp) |
| ; RV32ZVBC32-NEXT: sw t3, 116(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 104(sp) |
| ; RV32ZVBC32-NEXT: sw t0, 108(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 96(sp) |
| ; RV32ZVBC32-NEXT: sw a7, 100(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 88(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 92(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 80(sp) |
| ; RV32ZVBC32-NEXT: sw a5, 84(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 72(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 76(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 64(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 68(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 56(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 60(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 48(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 52(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 40(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 44(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 32(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 36(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 24(sp) |
| ; RV32ZVBC32-NEXT: sw t1, 28(sp) |
| ; RV32ZVBC32-NEXT: addi a1, sp, 16 |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 272 |
| ; RV32ZVBC32-NEXT: vlse64.v v22, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 264 |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 256 |
| ; RV32ZVBC32-NEXT: vlse64.v v2, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 248 |
| ; RV32ZVBC32-NEXT: vlse64.v v26, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 240 |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 232 |
| ; RV32ZVBC32-NEXT: vlse64.v v30, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 224 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a1), zero |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: mv a5, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a5, a5, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a5 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a1, sp, 216 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a1), zero |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: mv a5, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a5 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a1, sp, 208 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a1), zero |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: mv a5, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a5, a5, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: add a1, a1, a5 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a1, sp, 200 |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 192 |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (a1), zero |
| ; RV32ZVBC32-NEXT: addi ra, sp, 184 |
| ; RV32ZVBC32-NEXT: vand.vi v16, v10, 2 |
| ; RV32ZVBC32-NEXT: vand.vi v18, v10, 1 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v18, v16 |
| ; RV32ZVBC32-NEXT: vand.vi v18, v10, 4 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vi v18, v10, 8 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, s6 |
| ; RV32ZVBC32-NEXT: addi s10, sp, 176 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, s5 |
| ; RV32ZVBC32-NEXT: addi s9, sp, 168 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: li a1, 64 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi s8, sp, 160 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: li a1, 128 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi s7, sp, 152 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: li a1, 256 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi s6, sp, 144 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: li a1, 512 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi s5, sp, 136 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: li a1, 1024 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi s4, sp, 128 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, s11 |
| ; RV32ZVBC32-NEXT: addi s11, sp, 120 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a1, 1 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi s3, sp, 112 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a1, 2 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi s2, sp, 104 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a1, 4 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi s1, sp, 96 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a1, 8 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi s0, sp, 88 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a1, 16 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi t6, sp, 80 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a1, 32 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi t5, sp, 72 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a1, 64 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi t4, sp, 64 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a1, 128 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi t3, sp, 56 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a1, 256 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: addi t2, sp, 48 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, t0 |
| ; RV32ZVBC32-NEXT: addi t1, sp, 40 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a7 |
| ; RV32ZVBC32-NEXT: addi a7, sp, 32 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a6 |
| ; RV32ZVBC32-NEXT: addi a5, sp, 24 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a1, 4096 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a1 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a4 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a3 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a2 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: lui a0, 131072 |
| ; RV32ZVBC32-NEXT: vand.vx v18, v10, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v16, v18 |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (ra), zero |
| ; RV32ZVBC32-NEXT: vand.vv v16, v10, v12 |
| ; RV32ZVBC32-NEXT: vand.vv v18, v10, v22 |
| ; RV32ZVBC32-NEXT: vand.vv v22, v10, v14 |
| ; RV32ZVBC32-NEXT: vand.vv v24, v10, v2 |
| ; RV32ZVBC32-NEXT: vand.vv v26, v10, v26 |
| ; RV32ZVBC32-NEXT: vand.vv v28, v10, v28 |
| ; RV32ZVBC32-NEXT: vand.vv v30, v10, v30 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v2, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v6 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v4 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v0 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (s10), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (s9), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s8), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (s7), zero |
| ; RV32ZVBC32-NEXT: vand.vv v0, v10, v6 |
| ; RV32ZVBC32-NEXT: vand.vv v6, v10, v4 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v14 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (s5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (s4), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (s11), zero |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v14 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v6 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v4 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s3), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (s2), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (s1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (s0), zero |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v14 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v6 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v4 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (t6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (t5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (t4), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (t3), zero |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v14 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v4 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v6 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v14, (t2), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (t1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (a7), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (a5), zero |
| ; RV32ZVBC32-NEXT: vand.vv v14, v10, v14 |
| ; RV32ZVBC32-NEXT: vand.vv v6, v10, v6 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v6, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v12, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v6, v10, v4 |
| ; RV32ZVBC32-NEXT: lui a0, 262144 |
| ; RV32ZVBC32-NEXT: vand.vx v10, v10, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v20, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v22 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v26 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v30 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v2 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v14 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v8, v8, v6 |
| ; RV32ZVBC32-NEXT: vxor.vv v8, v10, v8 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: addi sp, sp, 352 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv2i64_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: andi a1, a0, 2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 1 |
| ; RV64ZVBC32-NEXT: vsetvli a3, zero, e64, m2, ta, ma |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 4 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 8 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v12, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 16 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 32 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 64 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 128 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 256 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 512 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a3, a0, 1024 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: li a1, 1 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a3 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 11 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 1 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 2 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 4 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 8 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 16 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 32 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 64 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 128 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 256 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 512 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 1024 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 2048 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 4096 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 8192 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 16384 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 32768 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 65536 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 131072 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 262144 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: srliw a2, a0, 31 |
| ; RV64ZVBC32-NEXT: slli a2, a2, 31 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 32 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 33 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 34 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 35 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 36 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 37 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 38 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 39 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 40 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 41 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 42 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 43 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 44 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 45 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 46 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 47 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 48 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 49 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 50 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 51 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 52 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 53 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 54 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 55 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 56 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 57 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 58 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 59 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 60 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 61 |
| ; RV64ZVBC32-NEXT: slli a1, a1, 62 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: and a1, a0, a1 |
| ; RV64ZVBC32-NEXT: srli a0, a0, 63 |
| ; RV64ZVBC32-NEXT: slli a0, a0, 63 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a1 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: vxor.vv v8, v10, v8 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 |
| %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer |
| %v = call <vscale x 2 x i64> @llvm.clmul.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) |
| ret <vscale x 2 x i64> %v |
| } |
| |
| define <vscale x 4 x i64> @clmul_nxv4i64_vv(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv4i64_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -352 |
| ; RV32V-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: sub sp, sp, a0 |
| ; RV32V-NEXT: lui a1, 524288 |
| ; RV32V-NEXT: li s5, 1 |
| ; RV32V-NEXT: li a3, 2 |
| ; RV32V-NEXT: li a2, 4 |
| ; RV32V-NEXT: li a0, 8 |
| ; RV32V-NEXT: li s3, 16 |
| ; RV32V-NEXT: li s2, 32 |
| ; RV32V-NEXT: li s4, 64 |
| ; RV32V-NEXT: li s6, 128 |
| ; RV32V-NEXT: li s8, 256 |
| ; RV32V-NEXT: li s0, 512 |
| ; RV32V-NEXT: li s7, 1024 |
| ; RV32V-NEXT: lui ra, 1 |
| ; RV32V-NEXT: lui s11, 2 |
| ; RV32V-NEXT: lui s10, 4 |
| ; RV32V-NEXT: lui s9, 8 |
| ; RV32V-NEXT: lui s1, 16 |
| ; RV32V-NEXT: lui t6, 32 |
| ; RV32V-NEXT: lui t5, 64 |
| ; RV32V-NEXT: lui t4, 128 |
| ; RV32V-NEXT: lui t3, 256 |
| ; RV32V-NEXT: lui t2, 512 |
| ; RV32V-NEXT: lui t1, 1024 |
| ; RV32V-NEXT: lui t0, 2048 |
| ; RV32V-NEXT: lui a7, 4096 |
| ; RV32V-NEXT: lui a6, 8192 |
| ; RV32V-NEXT: lui a5, 16384 |
| ; RV32V-NEXT: lui a4, 32768 |
| ; RV32V-NEXT: sw a1, 16(sp) |
| ; RV32V-NEXT: sw zero, 20(sp) |
| ; RV32V-NEXT: sw zero, 272(sp) |
| ; RV32V-NEXT: sw s5, 276(sp) |
| ; RV32V-NEXT: sw zero, 264(sp) |
| ; RV32V-NEXT: sw a3, 268(sp) |
| ; RV32V-NEXT: lui a3, 65536 |
| ; RV32V-NEXT: sw zero, 256(sp) |
| ; RV32V-NEXT: sw a2, 260(sp) |
| ; RV32V-NEXT: lui a2, 131072 |
| ; RV32V-NEXT: sw zero, 248(sp) |
| ; RV32V-NEXT: sw a0, 252(sp) |
| ; RV32V-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV32V-NEXT: vand.vi v28, v12, 2 |
| ; RV32V-NEXT: vand.vi v4, v12, 1 |
| ; RV32V-NEXT: vand.vi v24, v12, 4 |
| ; RV32V-NEXT: vand.vi v20, v12, 8 |
| ; RV32V-NEXT: sw zero, 240(sp) |
| ; RV32V-NEXT: sw s3, 244(sp) |
| ; RV32V-NEXT: vand.vx v16, v12, s3 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: addi s3, sp, 16 |
| ; RV32V-NEXT: sw zero, 232(sp) |
| ; RV32V-NEXT: sw s2, 236(sp) |
| ; RV32V-NEXT: vand.vx v0, v12, s2 |
| ; RV32V-NEXT: addi s2, sp, 272 |
| ; RV32V-NEXT: sw zero, 224(sp) |
| ; RV32V-NEXT: sw s4, 228(sp) |
| ; RV32V-NEXT: vmul.vv v16, v8, v28 |
| ; RV32V-NEXT: vmul.vv v28, v8, v4 |
| ; RV32V-NEXT: vxor.vv v28, v28, v16 |
| ; RV32V-NEXT: vand.vx v16, v12, s4 |
| ; RV32V-NEXT: addi s4, sp, 264 |
| ; RV32V-NEXT: sw zero, 216(sp) |
| ; RV32V-NEXT: sw s6, 220(sp) |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v28, v28, v24 |
| ; RV32V-NEXT: vand.vx v24, v12, s6 |
| ; RV32V-NEXT: addi s6, sp, 256 |
| ; RV32V-NEXT: sw zero, 208(sp) |
| ; RV32V-NEXT: sw s8, 212(sp) |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v20, v28, v20 |
| ; RV32V-NEXT: vand.vx v28, v12, s8 |
| ; RV32V-NEXT: addi s8, sp, 248 |
| ; RV32V-NEXT: sw zero, 200(sp) |
| ; RV32V-NEXT: sw s0, 204(sp) |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v4, v8, v4 |
| ; RV32V-NEXT: vxor.vv v20, v20, v4 |
| ; RV32V-NEXT: vand.vx v4, v12, s0 |
| ; RV32V-NEXT: sw zero, 192(sp) |
| ; RV32V-NEXT: sw s7, 196(sp) |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v20, v20, v0 |
| ; RV32V-NEXT: vand.vx v0, v12, s7 |
| ; RV32V-NEXT: slli a0, s5, 11 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v20, v20, v16 |
| ; RV32V-NEXT: vand.vx v16, v12, ra |
| ; RV32V-NEXT: sw zero, 184(sp) |
| ; RV32V-NEXT: sw a0, 188(sp) |
| ; RV32V-NEXT: sw zero, 176(sp) |
| ; RV32V-NEXT: sw ra, 180(sp) |
| ; RV32V-NEXT: addi s5, sp, 224 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v24, v20, v24 |
| ; RV32V-NEXT: vand.vx v20, v12, s11 |
| ; RV32V-NEXT: sw zero, 168(sp) |
| ; RV32V-NEXT: sw s11, 172(sp) |
| ; RV32V-NEXT: addi s11, sp, 216 |
| ; RV32V-NEXT: vmul.vv v28, v8, v28 |
| ; RV32V-NEXT: vxor.vv v28, v24, v28 |
| ; RV32V-NEXT: vand.vx v24, v12, s10 |
| ; RV32V-NEXT: sw zero, 160(sp) |
| ; RV32V-NEXT: sw s10, 164(sp) |
| ; RV32V-NEXT: addi s10, sp, 208 |
| ; RV32V-NEXT: vmul.vv v4, v8, v4 |
| ; RV32V-NEXT: vxor.vv v4, v28, v4 |
| ; RV32V-NEXT: vand.vx v28, v12, s9 |
| ; RV32V-NEXT: sw zero, 152(sp) |
| ; RV32V-NEXT: sw s9, 156(sp) |
| ; RV32V-NEXT: addi s9, sp, 200 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi ra, sp, 192 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v0, v4, v0 |
| ; RV32V-NEXT: vand.vx v4, v12, s1 |
| ; RV32V-NEXT: sw zero, 144(sp) |
| ; RV32V-NEXT: sw s1, 148(sp) |
| ; RV32V-NEXT: addi s1, sp, 184 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v0, v0, v16 |
| ; RV32V-NEXT: vand.vx v16, v12, t6 |
| ; RV32V-NEXT: sw zero, 136(sp) |
| ; RV32V-NEXT: sw t6, 140(sp) |
| ; RV32V-NEXT: addi s0, sp, 176 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v0, v0, v20 |
| ; RV32V-NEXT: vand.vx v20, v12, t5 |
| ; RV32V-NEXT: sw zero, 128(sp) |
| ; RV32V-NEXT: sw t5, 132(sp) |
| ; RV32V-NEXT: addi t6, sp, 168 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v0, v0, v24 |
| ; RV32V-NEXT: vand.vx v24, v12, t4 |
| ; RV32V-NEXT: sw zero, 120(sp) |
| ; RV32V-NEXT: sw t4, 124(sp) |
| ; RV32V-NEXT: addi t5, sp, 160 |
| ; RV32V-NEXT: vmul.vv v28, v8, v28 |
| ; RV32V-NEXT: vxor.vv v0, v0, v28 |
| ; RV32V-NEXT: vand.vx v28, v12, t3 |
| ; RV32V-NEXT: sw zero, 112(sp) |
| ; RV32V-NEXT: sw t3, 116(sp) |
| ; RV32V-NEXT: addi t4, sp, 152 |
| ; RV32V-NEXT: vmul.vv v4, v8, v4 |
| ; RV32V-NEXT: vxor.vv v0, v0, v4 |
| ; RV32V-NEXT: vand.vx v4, v12, t2 |
| ; RV32V-NEXT: sw zero, 104(sp) |
| ; RV32V-NEXT: sw t2, 108(sp) |
| ; RV32V-NEXT: addi t3, sp, 144 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v16, v0, v16 |
| ; RV32V-NEXT: vand.vx v0, v12, t1 |
| ; RV32V-NEXT: sw zero, 96(sp) |
| ; RV32V-NEXT: sw t1, 100(sp) |
| ; RV32V-NEXT: addi t2, sp, 136 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v20, v16, v20 |
| ; RV32V-NEXT: vand.vx v16, v12, t0 |
| ; RV32V-NEXT: sw zero, 88(sp) |
| ; RV32V-NEXT: sw t0, 92(sp) |
| ; RV32V-NEXT: addi t1, sp, 128 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v24, v20, v24 |
| ; RV32V-NEXT: vand.vx v20, v12, a7 |
| ; RV32V-NEXT: sw zero, 80(sp) |
| ; RV32V-NEXT: sw a7, 84(sp) |
| ; RV32V-NEXT: addi t0, sp, 120 |
| ; RV32V-NEXT: vmul.vv v28, v8, v28 |
| ; RV32V-NEXT: vxor.vv v24, v24, v28 |
| ; RV32V-NEXT: vand.vx v28, v12, a6 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: sw zero, 72(sp) |
| ; RV32V-NEXT: sw a6, 76(sp) |
| ; RV32V-NEXT: addi a7, sp, 112 |
| ; RV32V-NEXT: vmul.vv v28, v8, v4 |
| ; RV32V-NEXT: vxor.vv v24, v24, v28 |
| ; RV32V-NEXT: vand.vx v28, v12, a5 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: sw zero, 64(sp) |
| ; RV32V-NEXT: sw a5, 68(sp) |
| ; RV32V-NEXT: addi a6, sp, 104 |
| ; RV32V-NEXT: vmul.vv v28, v8, v0 |
| ; RV32V-NEXT: vxor.vv v28, v24, v28 |
| ; RV32V-NEXT: vand.vx v24, v12, a4 |
| ; RV32V-NEXT: sw zero, 56(sp) |
| ; RV32V-NEXT: sw a4, 60(sp) |
| ; RV32V-NEXT: addi a5, sp, 96 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v16, v28, v16 |
| ; RV32V-NEXT: vand.vx v28, v12, a3 |
| ; RV32V-NEXT: sw zero, 48(sp) |
| ; RV32V-NEXT: sw a3, 52(sp) |
| ; RV32V-NEXT: addi a4, sp, 88 |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v16, v16, v20 |
| ; RV32V-NEXT: vand.vx v4, v12, a2 |
| ; RV32V-NEXT: sw zero, 40(sp) |
| ; RV32V-NEXT: sw a2, 44(sp) |
| ; RV32V-NEXT: addi a3, sp, 80 |
| ; RV32V-NEXT: sw zero, 32(sp) |
| ; RV32V-NEXT: lui a1, 262144 |
| ; RV32V-NEXT: sw a1, 36(sp) |
| ; RV32V-NEXT: sw zero, 24(sp) |
| ; RV32V-NEXT: lui a0, 524288 |
| ; RV32V-NEXT: sw a0, 28(sp) |
| ; RV32V-NEXT: addi a2, sp, 72 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add s7, s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add s7, s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v20, v8, v20 |
| ; RV32V-NEXT: vxor.vv v20, v16, v20 |
| ; RV32V-NEXT: vlse64.v v16, (s3), zero |
| ; RV32V-NEXT: addi s3, sp, 64 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add s7, s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v0, v20, v0 |
| ; RV32V-NEXT: vlse64.v v20, (s2), zero |
| ; RV32V-NEXT: addi s2, sp, 56 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v0, v0, v24 |
| ; RV32V-NEXT: vlse64.v v24, (s4), zero |
| ; RV32V-NEXT: addi s4, sp, 48 |
| ; RV32V-NEXT: vmul.vv v28, v8, v28 |
| ; RV32V-NEXT: vxor.vv v0, v0, v28 |
| ; RV32V-NEXT: vlse64.v v28, (s6), zero |
| ; RV32V-NEXT: addi s6, sp, 40 |
| ; RV32V-NEXT: vmul.vv v4, v8, v4 |
| ; RV32V-NEXT: vxor.vv v4, v0, v4 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add s7, s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add s7, s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v4, (s8), zero |
| ; RV32V-NEXT: addi s8, sp, 32 |
| ; RV32V-NEXT: vand.vv v16, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: mv s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add s7, s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add s7, s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v28 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add s7, s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add s7, s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v4 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add s7, s7, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: addi a0, sp, 24 |
| ; RV32V-NEXT: addi s7, sp, 240 |
| ; RV32V-NEXT: vlse64.v v16, (s7), zero |
| ; RV32V-NEXT: addi s7, sp, 232 |
| ; RV32V-NEXT: vlse64.v v20, (s7), zero |
| ; RV32V-NEXT: vlse64.v v24, (s5), zero |
| ; RV32V-NEXT: vlse64.v v28, (s11), zero |
| ; RV32V-NEXT: vand.vv v16, v12, v16 |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 4 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s5) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: mv s7, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s7, s7, s5 |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: add s5, s5, s7 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s5) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: mv s7, s5 |
| ; RV32V-NEXT: slli s5, s5, 4 |
| ; RV32V-NEXT: add s5, s5, s7 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s5) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v28 |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: mv s7, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s7, s7, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s7, s7, s5 |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: add s5, s5, s7 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s5) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v20, (s10), zero |
| ; RV32V-NEXT: vlse64.v v24, (s9), zero |
| ; RV32V-NEXT: vlse64.v v28, (ra), zero |
| ; RV32V-NEXT: vlse64.v v4, (s1), zero |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr s1, vlenb |
| ; RV32V-NEXT: slli s1, s1, 2 |
| ; RV32V-NEXT: mv s5, s1 |
| ; RV32V-NEXT: slli s1, s1, 1 |
| ; RV32V-NEXT: add s1, s1, s5 |
| ; RV32V-NEXT: add s1, sp, s1 |
| ; RV32V-NEXT: addi s1, s1, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr s1, vlenb |
| ; RV32V-NEXT: slli s1, s1, 3 |
| ; RV32V-NEXT: mv s5, s1 |
| ; RV32V-NEXT: slli s1, s1, 2 |
| ; RV32V-NEXT: add s1, s1, s5 |
| ; RV32V-NEXT: add s1, sp, s1 |
| ; RV32V-NEXT: addi s1, s1, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v28 |
| ; RV32V-NEXT: csrr s1, vlenb |
| ; RV32V-NEXT: slli s1, s1, 6 |
| ; RV32V-NEXT: add s1, sp, s1 |
| ; RV32V-NEXT: addi s1, s1, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v4 |
| ; RV32V-NEXT: csrr s1, vlenb |
| ; RV32V-NEXT: slli s1, s1, 3 |
| ; RV32V-NEXT: mv s5, s1 |
| ; RV32V-NEXT: slli s1, s1, 1 |
| ; RV32V-NEXT: add s5, s5, s1 |
| ; RV32V-NEXT: slli s1, s1, 2 |
| ; RV32V-NEXT: add s1, s1, s5 |
| ; RV32V-NEXT: add s1, sp, s1 |
| ; RV32V-NEXT: addi s1, s1, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s0), zero |
| ; RV32V-NEXT: vlse64.v v28, (t6), zero |
| ; RV32V-NEXT: vlse64.v v4, (t5), zero |
| ; RV32V-NEXT: vlse64.v v0, (t4), zero |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr t4, vlenb |
| ; RV32V-NEXT: slli t4, t4, 3 |
| ; RV32V-NEXT: add t4, sp, t4 |
| ; RV32V-NEXT: addi t4, t4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v28 |
| ; RV32V-NEXT: csrr t4, vlenb |
| ; RV32V-NEXT: slli t4, t4, 2 |
| ; RV32V-NEXT: mv t5, t4 |
| ; RV32V-NEXT: slli t4, t4, 3 |
| ; RV32V-NEXT: add t4, t4, t5 |
| ; RV32V-NEXT: add t4, sp, t4 |
| ; RV32V-NEXT: addi t4, t4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v4 |
| ; RV32V-NEXT: csrr t4, vlenb |
| ; RV32V-NEXT: slli t4, t4, 2 |
| ; RV32V-NEXT: mv t5, t4 |
| ; RV32V-NEXT: slli t4, t4, 1 |
| ; RV32V-NEXT: add t5, t5, t4 |
| ; RV32V-NEXT: slli t4, t4, 1 |
| ; RV32V-NEXT: add t5, t5, t4 |
| ; RV32V-NEXT: slli t4, t4, 1 |
| ; RV32V-NEXT: add t4, t4, t5 |
| ; RV32V-NEXT: add t4, sp, t4 |
| ; RV32V-NEXT: addi t4, t4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v0 |
| ; RV32V-NEXT: csrr t4, vlenb |
| ; RV32V-NEXT: slli t4, t4, 2 |
| ; RV32V-NEXT: mv t5, t4 |
| ; RV32V-NEXT: slli t4, t4, 2 |
| ; RV32V-NEXT: add t5, t5, t4 |
| ; RV32V-NEXT: slli t4, t4, 2 |
| ; RV32V-NEXT: add t4, t4, t5 |
| ; RV32V-NEXT: add t4, sp, t4 |
| ; RV32V-NEXT: addi t4, t4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v28, (t3), zero |
| ; RV32V-NEXT: vlse64.v v4, (t2), zero |
| ; RV32V-NEXT: vlse64.v v0, (t1), zero |
| ; RV32V-NEXT: vlse64.v v16, (t0), zero |
| ; RV32V-NEXT: vand.vv v20, v12, v28 |
| ; RV32V-NEXT: csrr t0, vlenb |
| ; RV32V-NEXT: slli t0, t0, 2 |
| ; RV32V-NEXT: add t0, sp, t0 |
| ; RV32V-NEXT: addi t0, t0, 288 |
| ; RV32V-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v20, v12, v4 |
| ; RV32V-NEXT: csrr t0, vlenb |
| ; RV32V-NEXT: slli t0, t0, 5 |
| ; RV32V-NEXT: add t0, sp, t0 |
| ; RV32V-NEXT: addi t0, t0, 288 |
| ; RV32V-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v20, v12, v0 |
| ; RV32V-NEXT: csrr t0, vlenb |
| ; RV32V-NEXT: slli t0, t0, 3 |
| ; RV32V-NEXT: mv t1, t0 |
| ; RV32V-NEXT: slli t0, t0, 1 |
| ; RV32V-NEXT: add t1, t1, t0 |
| ; RV32V-NEXT: slli t0, t0, 1 |
| ; RV32V-NEXT: add t0, t0, t1 |
| ; RV32V-NEXT: add t0, sp, t0 |
| ; RV32V-NEXT: addi t0, t0, 288 |
| ; RV32V-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v16 |
| ; RV32V-NEXT: csrr t0, vlenb |
| ; RV32V-NEXT: slli t0, t0, 4 |
| ; RV32V-NEXT: mv t1, t0 |
| ; RV32V-NEXT: slli t0, t0, 2 |
| ; RV32V-NEXT: add t0, t0, t1 |
| ; RV32V-NEXT: add t0, sp, t0 |
| ; RV32V-NEXT: addi t0, t0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (t0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v16, (a7), zero |
| ; RV32V-NEXT: vlse64.v v0, (a6), zero |
| ; RV32V-NEXT: vlse64.v v20, (a5), zero |
| ; RV32V-NEXT: vlse64.v v24, (a4), zero |
| ; RV32V-NEXT: vand.vv v4, v12, v16 |
| ; RV32V-NEXT: vand.vv v16, v12, v0 |
| ; RV32V-NEXT: csrr a4, vlenb |
| ; RV32V-NEXT: slli a4, a4, 2 |
| ; RV32V-NEXT: mv a5, a4 |
| ; RV32V-NEXT: slli a4, a4, 1 |
| ; RV32V-NEXT: add a5, a5, a4 |
| ; RV32V-NEXT: slli a4, a4, 1 |
| ; RV32V-NEXT: add a4, a4, a5 |
| ; RV32V-NEXT: add a4, sp, a4 |
| ; RV32V-NEXT: addi a4, a4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr a4, vlenb |
| ; RV32V-NEXT: slli a4, a4, 2 |
| ; RV32V-NEXT: mv a5, a4 |
| ; RV32V-NEXT: slli a4, a4, 2 |
| ; RV32V-NEXT: add a5, a5, a4 |
| ; RV32V-NEXT: slli a4, a4, 1 |
| ; RV32V-NEXT: add a4, a4, a5 |
| ; RV32V-NEXT: add a4, sp, a4 |
| ; RV32V-NEXT: addi a4, a4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr a4, vlenb |
| ; RV32V-NEXT: slli a4, a4, 2 |
| ; RV32V-NEXT: mv a5, a4 |
| ; RV32V-NEXT: slli a4, a4, 1 |
| ; RV32V-NEXT: add a5, a5, a4 |
| ; RV32V-NEXT: slli a4, a4, 3 |
| ; RV32V-NEXT: add a4, a4, a5 |
| ; RV32V-NEXT: add a4, sp, a4 |
| ; RV32V-NEXT: addi a4, a4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v16, (a3), zero |
| ; RV32V-NEXT: vlse64.v v20, (a2), zero |
| ; RV32V-NEXT: vlse64.v v24, (s3), zero |
| ; RV32V-NEXT: vlse64.v v28, (s2), zero |
| ; RV32V-NEXT: vand.vv v0, v12, v16 |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 3 |
| ; RV32V-NEXT: mv a3, a2 |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: add a2, a2, a3 |
| ; RV32V-NEXT: add a2, sp, a2 |
| ; RV32V-NEXT: addi a2, a2, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 4 |
| ; RV32V-NEXT: mv a3, a2 |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: add a2, a2, a3 |
| ; RV32V-NEXT: add a2, sp, a2 |
| ; RV32V-NEXT: addi a2, a2, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v28 |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 3 |
| ; RV32V-NEXT: mv a3, a2 |
| ; RV32V-NEXT: slli a2, a2, 3 |
| ; RV32V-NEXT: add a2, a2, a3 |
| ; RV32V-NEXT: add a2, sp, a2 |
| ; RV32V-NEXT: addi a2, a2, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v16, (s4), zero |
| ; RV32V-NEXT: vlse64.v v20, (s6), zero |
| ; RV32V-NEXT: vlse64.v v24, (s8), zero |
| ; RV32V-NEXT: vlse64.v v28, (a0), zero |
| ; RV32V-NEXT: vand.vv v16, v12, v16 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v12, v24 |
| ; RV32V-NEXT: vand.vv v20, v12, v28 |
| ; RV32V-NEXT: vand.vx v12, v12, a1 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vxor.vv v12, v16, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 6 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vv v16, v8, v4 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vv v16, v8, v0 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vv v16, v8, v24 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vv v8, v8, v20 |
| ; RV32V-NEXT: vxor.vv v8, v12, v8 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add sp, sp, a0 |
| ; RV32V-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 352 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv4i64_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV64V-NEXT: vand.vi v16, v12, 2 |
| ; RV64V-NEXT: vand.vi v20, v12, 1 |
| ; RV64V-NEXT: vmul.vv v16, v8, v16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v20, v16 |
| ; RV64V-NEXT: vand.vi v20, v12, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vi v20, v12, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: li a1, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: li a1, 128 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: li a1, 512 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: li a2, 1024 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a2 |
| ; RV64V-NEXT: slli a1, a0, 11 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 1 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 2 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 4 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 8 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 16 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 64 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 128 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 256 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 512 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 1024 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 2048 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 4096 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 8192 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 16384 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 32768 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 65536 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 131072 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: lui a1, 262144 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 31 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 32 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 33 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 34 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 35 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 36 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 37 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 38 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 39 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 40 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 41 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 42 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 43 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 44 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 45 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 46 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 47 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 48 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 49 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 50 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 51 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 52 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 53 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 54 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 55 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 56 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 57 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 58 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 59 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 60 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: slli a1, a0, 61 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a1 |
| ; RV64V-NEXT: li a1, -1 |
| ; RV64V-NEXT: slli a0, a0, 62 |
| ; RV64V-NEXT: slli a1, a1, 63 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vand.vx v20, v12, a0 |
| ; RV64V-NEXT: vand.vx v12, v12, a1 |
| ; RV64V-NEXT: vmul.vv v20, v8, v20 |
| ; RV64V-NEXT: vxor.vv v16, v16, v20 |
| ; RV64V-NEXT: vmul.vv v8, v8, v12 |
| ; RV64V-NEXT: vxor.vv v8, v16, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv4i64_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v8, v12 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv4i64_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v8, v12 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv4i64_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: addi sp, sp, -352 |
| ; RV32ZVBC32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: sub sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lui a1, 524288 |
| ; RV32ZVBC32-NEXT: li s5, 1 |
| ; RV32ZVBC32-NEXT: li a3, 2 |
| ; RV32ZVBC32-NEXT: li a2, 4 |
| ; RV32ZVBC32-NEXT: li a0, 8 |
| ; RV32ZVBC32-NEXT: li s3, 16 |
| ; RV32ZVBC32-NEXT: li s2, 32 |
| ; RV32ZVBC32-NEXT: li s4, 64 |
| ; RV32ZVBC32-NEXT: li s6, 128 |
| ; RV32ZVBC32-NEXT: li s8, 256 |
| ; RV32ZVBC32-NEXT: li s0, 512 |
| ; RV32ZVBC32-NEXT: li s7, 1024 |
| ; RV32ZVBC32-NEXT: lui ra, 1 |
| ; RV32ZVBC32-NEXT: lui s11, 2 |
| ; RV32ZVBC32-NEXT: lui s10, 4 |
| ; RV32ZVBC32-NEXT: lui s9, 8 |
| ; RV32ZVBC32-NEXT: lui s1, 16 |
| ; RV32ZVBC32-NEXT: lui t6, 32 |
| ; RV32ZVBC32-NEXT: lui t5, 64 |
| ; RV32ZVBC32-NEXT: lui t4, 128 |
| ; RV32ZVBC32-NEXT: lui t3, 256 |
| ; RV32ZVBC32-NEXT: lui t2, 512 |
| ; RV32ZVBC32-NEXT: lui t1, 1024 |
| ; RV32ZVBC32-NEXT: lui t0, 2048 |
| ; RV32ZVBC32-NEXT: lui a7, 4096 |
| ; RV32ZVBC32-NEXT: lui a6, 8192 |
| ; RV32ZVBC32-NEXT: lui a5, 16384 |
| ; RV32ZVBC32-NEXT: lui a4, 32768 |
| ; RV32ZVBC32-NEXT: sw a1, 16(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 20(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 272(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 276(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 264(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 268(sp) |
| ; RV32ZVBC32-NEXT: lui a3, 65536 |
| ; RV32ZVBC32-NEXT: sw zero, 256(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 260(sp) |
| ; RV32ZVBC32-NEXT: lui a2, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 248(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 252(sp) |
| ; RV32ZVBC32-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV32ZVBC32-NEXT: vand.vi v28, v12, 2 |
| ; RV32ZVBC32-NEXT: vand.vi v4, v12, 1 |
| ; RV32ZVBC32-NEXT: vand.vi v24, v12, 4 |
| ; RV32ZVBC32-NEXT: vand.vi v20, v12, 8 |
| ; RV32ZVBC32-NEXT: sw zero, 240(sp) |
| ; RV32ZVBC32-NEXT: sw s3, 244(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v16, v12, s3 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s3, sp, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 232(sp) |
| ; RV32ZVBC32-NEXT: sw s2, 236(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, s2 |
| ; RV32ZVBC32-NEXT: addi s2, sp, 272 |
| ; RV32ZVBC32-NEXT: sw zero, 224(sp) |
| ; RV32ZVBC32-NEXT: sw s4, 228(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v28 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v28, v28, v16 |
| ; RV32ZVBC32-NEXT: vand.vx v16, v12, s4 |
| ; RV32ZVBC32-NEXT: addi s4, sp, 264 |
| ; RV32ZVBC32-NEXT: sw zero, 216(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 220(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v28, v28, v24 |
| ; RV32ZVBC32-NEXT: vand.vx v24, v12, s6 |
| ; RV32ZVBC32-NEXT: addi s6, sp, 256 |
| ; RV32ZVBC32-NEXT: sw zero, 208(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 212(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v28, v20 |
| ; RV32ZVBC32-NEXT: vand.vx v28, v12, s8 |
| ; RV32ZVBC32-NEXT: addi s8, sp, 248 |
| ; RV32ZVBC32-NEXT: sw zero, 200(sp) |
| ; RV32ZVBC32-NEXT: sw s0, 204(sp) |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v4, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v4 |
| ; RV32ZVBC32-NEXT: vand.vx v4, v12, s0 |
| ; RV32ZVBC32-NEXT: sw zero, 192(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 196(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, s7 |
| ; RV32ZVBC32-NEXT: slli a0, s5, 11 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v20, v16 |
| ; RV32ZVBC32-NEXT: vand.vx v16, v12, ra |
| ; RV32ZVBC32-NEXT: sw zero, 184(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 188(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 176(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 180(sp) |
| ; RV32ZVBC32-NEXT: addi s5, sp, 224 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v20, v24 |
| ; RV32ZVBC32-NEXT: vand.vx v20, v12, s11 |
| ; RV32ZVBC32-NEXT: sw zero, 168(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 172(sp) |
| ; RV32ZVBC32-NEXT: addi s11, sp, 216 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v28, v24, v28 |
| ; RV32ZVBC32-NEXT: vand.vx v24, v12, s10 |
| ; RV32ZVBC32-NEXT: sw zero, 160(sp) |
| ; RV32ZVBC32-NEXT: sw s10, 164(sp) |
| ; RV32ZVBC32-NEXT: addi s10, sp, 208 |
| ; RV32ZVBC32-NEXT: vmul.vv v4, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v28, v4 |
| ; RV32ZVBC32-NEXT: vand.vx v28, v12, s9 |
| ; RV32ZVBC32-NEXT: sw zero, 152(sp) |
| ; RV32ZVBC32-NEXT: sw s9, 156(sp) |
| ; RV32ZVBC32-NEXT: addi s9, sp, 200 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi ra, sp, 192 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v0, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v4, v12, s1 |
| ; RV32ZVBC32-NEXT: sw zero, 144(sp) |
| ; RV32ZVBC32-NEXT: sw s1, 148(sp) |
| ; RV32ZVBC32-NEXT: addi s1, sp, 184 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v0, v0, v16 |
| ; RV32ZVBC32-NEXT: vand.vx v16, v12, t6 |
| ; RV32ZVBC32-NEXT: sw zero, 136(sp) |
| ; RV32ZVBC32-NEXT: sw t6, 140(sp) |
| ; RV32ZVBC32-NEXT: addi s0, sp, 176 |
| ; RV32ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v0, v0, v20 |
| ; RV32ZVBC32-NEXT: vand.vx v20, v12, t5 |
| ; RV32ZVBC32-NEXT: sw zero, 128(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 132(sp) |
| ; RV32ZVBC32-NEXT: addi t6, sp, 168 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v0, v0, v24 |
| ; RV32ZVBC32-NEXT: vand.vx v24, v12, t4 |
| ; RV32ZVBC32-NEXT: sw zero, 120(sp) |
| ; RV32ZVBC32-NEXT: sw t4, 124(sp) |
| ; RV32ZVBC32-NEXT: addi t5, sp, 160 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v0, v0, v28 |
| ; RV32ZVBC32-NEXT: vand.vx v28, v12, t3 |
| ; RV32ZVBC32-NEXT: sw zero, 112(sp) |
| ; RV32ZVBC32-NEXT: sw t3, 116(sp) |
| ; RV32ZVBC32-NEXT: addi t4, sp, 152 |
| ; RV32ZVBC32-NEXT: vmul.vv v4, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v0, v0, v4 |
| ; RV32ZVBC32-NEXT: vand.vx v4, v12, t2 |
| ; RV32ZVBC32-NEXT: sw zero, 104(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 108(sp) |
| ; RV32ZVBC32-NEXT: addi t3, sp, 144 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v0, v16 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, t1 |
| ; RV32ZVBC32-NEXT: sw zero, 96(sp) |
| ; RV32ZVBC32-NEXT: sw t1, 100(sp) |
| ; RV32ZVBC32-NEXT: addi t2, sp, 136 |
| ; RV32ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v16, v20 |
| ; RV32ZVBC32-NEXT: vand.vx v16, v12, t0 |
| ; RV32ZVBC32-NEXT: sw zero, 88(sp) |
| ; RV32ZVBC32-NEXT: sw t0, 92(sp) |
| ; RV32ZVBC32-NEXT: addi t1, sp, 128 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v20, v24 |
| ; RV32ZVBC32-NEXT: vand.vx v20, v12, a7 |
| ; RV32ZVBC32-NEXT: sw zero, 80(sp) |
| ; RV32ZVBC32-NEXT: sw a7, 84(sp) |
| ; RV32ZVBC32-NEXT: addi t0, sp, 120 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v28 |
| ; RV32ZVBC32-NEXT: vand.vx v28, v12, a6 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw zero, 72(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 76(sp) |
| ; RV32ZVBC32-NEXT: addi a7, sp, 112 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v28 |
| ; RV32ZVBC32-NEXT: vand.vx v28, v12, a5 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw zero, 64(sp) |
| ; RV32ZVBC32-NEXT: sw a5, 68(sp) |
| ; RV32ZVBC32-NEXT: addi a6, sp, 104 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v28, v24, v28 |
| ; RV32ZVBC32-NEXT: vand.vx v24, v12, a4 |
| ; RV32ZVBC32-NEXT: sw zero, 56(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 60(sp) |
| ; RV32ZVBC32-NEXT: addi a5, sp, 96 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v28, v16 |
| ; RV32ZVBC32-NEXT: vand.vx v28, v12, a3 |
| ; RV32ZVBC32-NEXT: sw zero, 48(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 52(sp) |
| ; RV32ZVBC32-NEXT: addi a4, sp, 88 |
| ; RV32ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV32ZVBC32-NEXT: vand.vx v4, v12, a2 |
| ; RV32ZVBC32-NEXT: sw zero, 40(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 44(sp) |
| ; RV32ZVBC32-NEXT: addi a3, sp, 80 |
| ; RV32ZVBC32-NEXT: sw zero, 32(sp) |
| ; RV32ZVBC32-NEXT: lui a1, 262144 |
| ; RV32ZVBC32-NEXT: sw a1, 36(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 24(sp) |
| ; RV32ZVBC32-NEXT: lui a0, 524288 |
| ; RV32ZVBC32-NEXT: sw a0, 28(sp) |
| ; RV32ZVBC32-NEXT: addi a2, sp, 72 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add s7, s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v20, v16, v20 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (s3), zero |
| ; RV32ZVBC32-NEXT: addi s3, sp, 64 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v0, v20, v0 |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (s2), zero |
| ; RV32ZVBC32-NEXT: addi s2, sp, 56 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v0, v0, v24 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s4), zero |
| ; RV32ZVBC32-NEXT: addi s4, sp, 48 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v0, v0, v28 |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (s6), zero |
| ; RV32ZVBC32-NEXT: addi s6, sp, 40 |
| ; RV32ZVBC32-NEXT: vmul.vv v4, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v0, v4 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add s7, s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (s8), zero |
| ; RV32ZVBC32-NEXT: addi s8, sp, 32 |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: mv s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add s7, s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add s7, s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add s7, s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v4 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a0, sp, 24 |
| ; RV32ZVBC32-NEXT: addi s7, sp, 240 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (s7), zero |
| ; RV32ZVBC32-NEXT: addi s7, sp, 232 |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (s7), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (s11), zero |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 4 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s5) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: mv s7, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: add s5, s5, s7 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s5) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: mv s7, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 4 |
| ; RV32ZVBC32-NEXT: add s5, s5, s7 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s5) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: mv s7, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: add s5, s5, s7 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s5) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (s10), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s9), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (ra), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (s1), zero |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr s1, vlenb |
| ; RV32ZVBC32-NEXT: slli s1, s1, 2 |
| ; RV32ZVBC32-NEXT: mv s5, s1 |
| ; RV32ZVBC32-NEXT: slli s1, s1, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, s5 |
| ; RV32ZVBC32-NEXT: add s1, sp, s1 |
| ; RV32ZVBC32-NEXT: addi s1, s1, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr s1, vlenb |
| ; RV32ZVBC32-NEXT: slli s1, s1, 3 |
| ; RV32ZVBC32-NEXT: mv s5, s1 |
| ; RV32ZVBC32-NEXT: slli s1, s1, 2 |
| ; RV32ZVBC32-NEXT: add s1, s1, s5 |
| ; RV32ZVBC32-NEXT: add s1, sp, s1 |
| ; RV32ZVBC32-NEXT: addi s1, s1, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr s1, vlenb |
| ; RV32ZVBC32-NEXT: slli s1, s1, 6 |
| ; RV32ZVBC32-NEXT: add s1, sp, s1 |
| ; RV32ZVBC32-NEXT: addi s1, s1, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v4 |
| ; RV32ZVBC32-NEXT: csrr s1, vlenb |
| ; RV32ZVBC32-NEXT: slli s1, s1, 3 |
| ; RV32ZVBC32-NEXT: mv s5, s1 |
| ; RV32ZVBC32-NEXT: slli s1, s1, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s1 |
| ; RV32ZVBC32-NEXT: slli s1, s1, 2 |
| ; RV32ZVBC32-NEXT: add s1, s1, s5 |
| ; RV32ZVBC32-NEXT: add s1, sp, s1 |
| ; RV32ZVBC32-NEXT: addi s1, s1, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s0), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (t6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (t5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (t4), zero |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr t4, vlenb |
| ; RV32ZVBC32-NEXT: slli t4, t4, 3 |
| ; RV32ZVBC32-NEXT: add t4, sp, t4 |
| ; RV32ZVBC32-NEXT: addi t4, t4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr t4, vlenb |
| ; RV32ZVBC32-NEXT: slli t4, t4, 2 |
| ; RV32ZVBC32-NEXT: mv t5, t4 |
| ; RV32ZVBC32-NEXT: slli t4, t4, 3 |
| ; RV32ZVBC32-NEXT: add t4, t4, t5 |
| ; RV32ZVBC32-NEXT: add t4, sp, t4 |
| ; RV32ZVBC32-NEXT: addi t4, t4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v4 |
| ; RV32ZVBC32-NEXT: csrr t4, vlenb |
| ; RV32ZVBC32-NEXT: slli t4, t4, 2 |
| ; RV32ZVBC32-NEXT: mv t5, t4 |
| ; RV32ZVBC32-NEXT: slli t4, t4, 1 |
| ; RV32ZVBC32-NEXT: add t5, t5, t4 |
| ; RV32ZVBC32-NEXT: slli t4, t4, 1 |
| ; RV32ZVBC32-NEXT: add t5, t5, t4 |
| ; RV32ZVBC32-NEXT: slli t4, t4, 1 |
| ; RV32ZVBC32-NEXT: add t4, t4, t5 |
| ; RV32ZVBC32-NEXT: add t4, sp, t4 |
| ; RV32ZVBC32-NEXT: addi t4, t4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v0 |
| ; RV32ZVBC32-NEXT: csrr t4, vlenb |
| ; RV32ZVBC32-NEXT: slli t4, t4, 2 |
| ; RV32ZVBC32-NEXT: mv t5, t4 |
| ; RV32ZVBC32-NEXT: slli t4, t4, 2 |
| ; RV32ZVBC32-NEXT: add t5, t5, t4 |
| ; RV32ZVBC32-NEXT: slli t4, t4, 2 |
| ; RV32ZVBC32-NEXT: add t4, t4, t5 |
| ; RV32ZVBC32-NEXT: add t4, sp, t4 |
| ; RV32ZVBC32-NEXT: addi t4, t4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (t3), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (t2), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (t1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (t0), zero |
| ; RV32ZVBC32-NEXT: vand.vv v20, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr t0, vlenb |
| ; RV32ZVBC32-NEXT: slli t0, t0, 2 |
| ; RV32ZVBC32-NEXT: add t0, sp, t0 |
| ; RV32ZVBC32-NEXT: addi t0, t0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v20, v12, v4 |
| ; RV32ZVBC32-NEXT: csrr t0, vlenb |
| ; RV32ZVBC32-NEXT: slli t0, t0, 5 |
| ; RV32ZVBC32-NEXT: add t0, sp, t0 |
| ; RV32ZVBC32-NEXT: addi t0, t0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v20, v12, v0 |
| ; RV32ZVBC32-NEXT: csrr t0, vlenb |
| ; RV32ZVBC32-NEXT: slli t0, t0, 3 |
| ; RV32ZVBC32-NEXT: mv t1, t0 |
| ; RV32ZVBC32-NEXT: slli t0, t0, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, t0 |
| ; RV32ZVBC32-NEXT: slli t0, t0, 1 |
| ; RV32ZVBC32-NEXT: add t0, t0, t1 |
| ; RV32ZVBC32-NEXT: add t0, sp, t0 |
| ; RV32ZVBC32-NEXT: addi t0, t0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr t0, vlenb |
| ; RV32ZVBC32-NEXT: slli t0, t0, 4 |
| ; RV32ZVBC32-NEXT: mv t1, t0 |
| ; RV32ZVBC32-NEXT: slli t0, t0, 2 |
| ; RV32ZVBC32-NEXT: add t0, t0, t1 |
| ; RV32ZVBC32-NEXT: add t0, sp, t0 |
| ; RV32ZVBC32-NEXT: addi t0, t0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (t0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a7), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (a6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (a5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a4), zero |
| ; RV32ZVBC32-NEXT: vand.vv v4, v12, v16 |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v0 |
| ; RV32ZVBC32-NEXT: csrr a4, vlenb |
| ; RV32ZVBC32-NEXT: slli a4, a4, 2 |
| ; RV32ZVBC32-NEXT: mv a5, a4 |
| ; RV32ZVBC32-NEXT: slli a4, a4, 1 |
| ; RV32ZVBC32-NEXT: add a5, a5, a4 |
| ; RV32ZVBC32-NEXT: slli a4, a4, 1 |
| ; RV32ZVBC32-NEXT: add a4, a4, a5 |
| ; RV32ZVBC32-NEXT: add a4, sp, a4 |
| ; RV32ZVBC32-NEXT: addi a4, a4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr a4, vlenb |
| ; RV32ZVBC32-NEXT: slli a4, a4, 2 |
| ; RV32ZVBC32-NEXT: mv a5, a4 |
| ; RV32ZVBC32-NEXT: slli a4, a4, 2 |
| ; RV32ZVBC32-NEXT: add a5, a5, a4 |
| ; RV32ZVBC32-NEXT: slli a4, a4, 1 |
| ; RV32ZVBC32-NEXT: add a4, a4, a5 |
| ; RV32ZVBC32-NEXT: add a4, sp, a4 |
| ; RV32ZVBC32-NEXT: addi a4, a4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr a4, vlenb |
| ; RV32ZVBC32-NEXT: slli a4, a4, 2 |
| ; RV32ZVBC32-NEXT: mv a5, a4 |
| ; RV32ZVBC32-NEXT: slli a4, a4, 1 |
| ; RV32ZVBC32-NEXT: add a5, a5, a4 |
| ; RV32ZVBC32-NEXT: slli a4, a4, 3 |
| ; RV32ZVBC32-NEXT: add a4, a4, a5 |
| ; RV32ZVBC32-NEXT: add a4, sp, a4 |
| ; RV32ZVBC32-NEXT: addi a4, a4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a3), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (a2), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s3), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (s2), zero |
| ; RV32ZVBC32-NEXT: vand.vv v0, v12, v16 |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 3 |
| ; RV32ZVBC32-NEXT: mv a3, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a3 |
| ; RV32ZVBC32-NEXT: add a2, sp, a2 |
| ; RV32ZVBC32-NEXT: addi a2, a2, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 4 |
| ; RV32ZVBC32-NEXT: mv a3, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a3 |
| ; RV32ZVBC32-NEXT: add a2, sp, a2 |
| ; RV32ZVBC32-NEXT: addi a2, a2, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 3 |
| ; RV32ZVBC32-NEXT: mv a3, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 3 |
| ; RV32ZVBC32-NEXT: add a2, a2, a3 |
| ; RV32ZVBC32-NEXT: add a2, sp, a2 |
| ; RV32ZVBC32-NEXT: addi a2, a2, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (s4), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (s6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s8), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (a0), zero |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v16 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v12, v24 |
| ; RV32ZVBC32-NEXT: vand.vv v20, v12, v28 |
| ; RV32ZVBC32-NEXT: vand.vx v12, v12, a1 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v16, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 6 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: vmul.vv v8, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v8, v12, v8 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: addi sp, sp, 352 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv4i64_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e64, m4, ta, ma |
| ; RV64ZVBC32-NEXT: vand.vi v16, v12, 2 |
| ; RV64ZVBC32-NEXT: vand.vi v20, v12, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v20, v16 |
| ; RV64ZVBC32-NEXT: vand.vi v20, v12, 4 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vi v20, v12, 8 |
| ; RV64ZVBC32-NEXT: li a0, 16 |
| ; RV64ZVBC32-NEXT: li a1, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC32-NEXT: li a0, 64 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: li a1, 128 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC32-NEXT: li a0, 256 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: li a1, 512 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC32-NEXT: li a2, 1024 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: li a0, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a2 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 11 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 2 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 4 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 8 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 16 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 64 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 128 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 256 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 512 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 1024 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 2048 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 4096 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 8192 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 16384 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 32768 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 65536 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 131072 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 262144 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 31 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 33 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 34 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 35 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 36 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 37 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 38 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 39 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 40 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 41 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 42 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 43 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 44 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 45 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 46 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 47 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 48 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 49 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 50 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 51 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 52 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 53 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 54 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 55 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 56 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 57 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 58 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 59 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 60 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 61 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a1 |
| ; RV64ZVBC32-NEXT: li a1, -1 |
| ; RV64ZVBC32-NEXT: slli a0, a0, 62 |
| ; RV64ZVBC32-NEXT: slli a1, a1, 63 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vand.vx v20, v12, a0 |
| ; RV64ZVBC32-NEXT: vand.vx v12, v12, a1 |
| ; RV64ZVBC32-NEXT: vmul.vv v20, v8, v20 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v20 |
| ; RV64ZVBC32-NEXT: vmul.vv v8, v8, v12 |
| ; RV64ZVBC32-NEXT: vxor.vv v8, v16, v8 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 4 x i64> @llvm.clmul.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb) |
| ret <vscale x 4 x i64> %v |
| } |
| |
| define <vscale x 4 x i64> @clmul_nxv4i64_vx(<vscale x 4 x i64> %va, i64 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv4i64_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -352 |
| ; RV32V-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 3 |
| ; RV32V-NEXT: mv a3, a2 |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: add a3, a3, a2 |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: add a3, a3, a2 |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: add a2, a2, a3 |
| ; RV32V-NEXT: sub sp, sp, a2 |
| ; RV32V-NEXT: sw a0, 8(sp) |
| ; RV32V-NEXT: sw a1, 12(sp) |
| ; RV32V-NEXT: addi a1, sp, 8 |
| ; RV32V-NEXT: lui a2, 524288 |
| ; RV32V-NEXT: li s11, 1 |
| ; RV32V-NEXT: li a0, 2 |
| ; RV32V-NEXT: li s9, 4 |
| ; RV32V-NEXT: li s10, 8 |
| ; RV32V-NEXT: li ra, 64 |
| ; RV32V-NEXT: li s8, 256 |
| ; RV32V-NEXT: li s7, 512 |
| ; RV32V-NEXT: li s6, 1024 |
| ; RV32V-NEXT: lui s5, 1 |
| ; RV32V-NEXT: lui s4, 2 |
| ; RV32V-NEXT: lui s3, 4 |
| ; RV32V-NEXT: lui s2, 8 |
| ; RV32V-NEXT: lui s1, 16 |
| ; RV32V-NEXT: lui s0, 32 |
| ; RV32V-NEXT: lui t6, 64 |
| ; RV32V-NEXT: lui t5, 128 |
| ; RV32V-NEXT: lui t4, 256 |
| ; RV32V-NEXT: lui t2, 512 |
| ; RV32V-NEXT: lui t1, 1024 |
| ; RV32V-NEXT: lui t0, 2048 |
| ; RV32V-NEXT: lui a7, 4096 |
| ; RV32V-NEXT: lui a6, 8192 |
| ; RV32V-NEXT: lui a5, 16384 |
| ; RV32V-NEXT: vsetvli a4, zero, e64, m4, ta, ma |
| ; RV32V-NEXT: vlse64.v v12, (a1), zero |
| ; RV32V-NEXT: lui a3, 32768 |
| ; RV32V-NEXT: sw a2, 16(sp) |
| ; RV32V-NEXT: lui t3, 524288 |
| ; RV32V-NEXT: sw zero, 20(sp) |
| ; RV32V-NEXT: sw zero, 272(sp) |
| ; RV32V-NEXT: sw s11, 276(sp) |
| ; RV32V-NEXT: sw zero, 264(sp) |
| ; RV32V-NEXT: sw a0, 268(sp) |
| ; RV32V-NEXT: lui a2, 65536 |
| ; RV32V-NEXT: sw zero, 256(sp) |
| ; RV32V-NEXT: sw s9, 260(sp) |
| ; RV32V-NEXT: lui a1, 131072 |
| ; RV32V-NEXT: sw zero, 248(sp) |
| ; RV32V-NEXT: sw s10, 252(sp) |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: sw zero, 240(sp) |
| ; RV32V-NEXT: li a4, 16 |
| ; RV32V-NEXT: sw a4, 244(sp) |
| ; RV32V-NEXT: li s10, 16 |
| ; RV32V-NEXT: sw zero, 232(sp) |
| ; RV32V-NEXT: li a4, 32 |
| ; RV32V-NEXT: sw a4, 236(sp) |
| ; RV32V-NEXT: li a4, 32 |
| ; RV32V-NEXT: sw zero, 224(sp) |
| ; RV32V-NEXT: sw ra, 228(sp) |
| ; RV32V-NEXT: li ra, 64 |
| ; RV32V-NEXT: sw zero, 216(sp) |
| ; RV32V-NEXT: li s9, 128 |
| ; RV32V-NEXT: sw s9, 220(sp) |
| ; RV32V-NEXT: li s9, 128 |
| ; RV32V-NEXT: sw zero, 208(sp) |
| ; RV32V-NEXT: sw s8, 212(sp) |
| ; RV32V-NEXT: li s8, 256 |
| ; RV32V-NEXT: sw zero, 200(sp) |
| ; RV32V-NEXT: sw s7, 204(sp) |
| ; RV32V-NEXT: sw zero, 192(sp) |
| ; RV32V-NEXT: sw s6, 196(sp) |
| ; RV32V-NEXT: slli s11, s11, 11 |
| ; RV32V-NEXT: sw zero, 184(sp) |
| ; RV32V-NEXT: sw s11, 188(sp) |
| ; RV32V-NEXT: sw zero, 176(sp) |
| ; RV32V-NEXT: sw s5, 180(sp) |
| ; RV32V-NEXT: sw zero, 168(sp) |
| ; RV32V-NEXT: sw s4, 172(sp) |
| ; RV32V-NEXT: sw zero, 160(sp) |
| ; RV32V-NEXT: sw s3, 164(sp) |
| ; RV32V-NEXT: sw zero, 152(sp) |
| ; RV32V-NEXT: sw s2, 156(sp) |
| ; RV32V-NEXT: sw zero, 144(sp) |
| ; RV32V-NEXT: sw s1, 148(sp) |
| ; RV32V-NEXT: sw zero, 136(sp) |
| ; RV32V-NEXT: sw s0, 140(sp) |
| ; RV32V-NEXT: sw zero, 128(sp) |
| ; RV32V-NEXT: sw t6, 132(sp) |
| ; RV32V-NEXT: sw zero, 120(sp) |
| ; RV32V-NEXT: sw t5, 124(sp) |
| ; RV32V-NEXT: sw zero, 112(sp) |
| ; RV32V-NEXT: sw t4, 116(sp) |
| ; RV32V-NEXT: sw zero, 104(sp) |
| ; RV32V-NEXT: sw t2, 108(sp) |
| ; RV32V-NEXT: sw zero, 96(sp) |
| ; RV32V-NEXT: sw t1, 100(sp) |
| ; RV32V-NEXT: sw zero, 88(sp) |
| ; RV32V-NEXT: sw t0, 92(sp) |
| ; RV32V-NEXT: sw zero, 80(sp) |
| ; RV32V-NEXT: sw a7, 84(sp) |
| ; RV32V-NEXT: sw zero, 72(sp) |
| ; RV32V-NEXT: sw a6, 76(sp) |
| ; RV32V-NEXT: sw zero, 64(sp) |
| ; RV32V-NEXT: sw a5, 68(sp) |
| ; RV32V-NEXT: sw zero, 56(sp) |
| ; RV32V-NEXT: sw a3, 60(sp) |
| ; RV32V-NEXT: sw zero, 48(sp) |
| ; RV32V-NEXT: sw a2, 52(sp) |
| ; RV32V-NEXT: sw zero, 40(sp) |
| ; RV32V-NEXT: sw a1, 44(sp) |
| ; RV32V-NEXT: sw zero, 32(sp) |
| ; RV32V-NEXT: sw a0, 36(sp) |
| ; RV32V-NEXT: sw zero, 24(sp) |
| ; RV32V-NEXT: sw t3, 28(sp) |
| ; RV32V-NEXT: addi a0, sp, 16 |
| ; RV32V-NEXT: vlse64.v v16, (a0), zero |
| ; RV32V-NEXT: addi a0, sp, 272 |
| ; RV32V-NEXT: vlse64.v v28, (a0), zero |
| ; RV32V-NEXT: addi a0, sp, 264 |
| ; RV32V-NEXT: vlse64.v v20, (a0), zero |
| ; RV32V-NEXT: addi a0, sp, 256 |
| ; RV32V-NEXT: vlse64.v v24, (a0), zero |
| ; RV32V-NEXT: vand.vi v4, v12, 2 |
| ; RV32V-NEXT: vand.vi v0, v12, 1 |
| ; RV32V-NEXT: vmul.vv v4, v8, v4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v0, v4 |
| ; RV32V-NEXT: vand.vi v0, v12, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: vand.vi v0, v12, 8 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: vand.vx v0, v12, s10 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: vand.vx v0, v12, a4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: vand.vx v0, v12, ra |
| ; RV32V-NEXT: addi s10, sp, 224 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: vand.vx v0, v12, s9 |
| ; RV32V-NEXT: addi s7, sp, 216 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: vand.vx v0, v12, s8 |
| ; RV32V-NEXT: addi s6, sp, 208 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: li a0, 512 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi s5, sp, 200 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: li a0, 1024 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi s4, sp, 192 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: vand.vx v0, v12, s11 |
| ; RV32V-NEXT: addi s11, sp, 184 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 1 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi s3, sp, 176 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 2 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi s2, sp, 168 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 4 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi s1, sp, 160 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 8 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi s0, sp, 152 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 16 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi t6, sp, 144 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 32 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi t5, sp, 136 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 64 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi t4, sp, 128 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 128 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi t3, sp, 120 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 256 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi t2, sp, 112 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 512 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi t1, sp, 104 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 1024 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi t0, sp, 96 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 2048 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi a7, sp, 88 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 4096 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi a6, sp, 80 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 8192 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi a5, sp, 72 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 16384 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi a4, sp, 64 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: vand.vx v0, v12, a3 |
| ; RV32V-NEXT: addi a2, sp, 56 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: lui a0, 65536 |
| ; RV32V-NEXT: vand.vx v0, v12, a0 |
| ; RV32V-NEXT: addi a3, sp, 48 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: vand.vx v0, v12, a1 |
| ; RV32V-NEXT: addi a1, sp, 40 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v4, v4, v0 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add s8, s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add s8, s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s8 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: addi a0, sp, 248 |
| ; RV32V-NEXT: vlse64.v v4, (a0), zero |
| ; RV32V-NEXT: addi ra, sp, 32 |
| ; RV32V-NEXT: vand.vv v16, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: mv s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s8 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v28 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add s8, s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s8 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add s8, s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s8 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add s8, s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add s8, s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s8 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v4 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add s8, s8, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, s8 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: addi a0, sp, 24 |
| ; RV32V-NEXT: addi s8, sp, 240 |
| ; RV32V-NEXT: vlse64.v v16, (s8), zero |
| ; RV32V-NEXT: addi s8, sp, 232 |
| ; RV32V-NEXT: vlse64.v v20, (s8), zero |
| ; RV32V-NEXT: vlse64.v v24, (s10), zero |
| ; RV32V-NEXT: vlse64.v v28, (s7), zero |
| ; RV32V-NEXT: vand.vv v16, v12, v16 |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 4 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s7) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: mv s8, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s8, s8, s7 |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: add s7, s7, s8 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s7) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: mv s8, s7 |
| ; RV32V-NEXT: slli s7, s7, 4 |
| ; RV32V-NEXT: add s7, s7, s8 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s7) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v28 |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: mv s8, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s8, s8, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s8, s8, s7 |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: add s7, s7, s8 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s7) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v20, (s6), zero |
| ; RV32V-NEXT: vlse64.v v24, (s5), zero |
| ; RV32V-NEXT: vlse64.v v28, (s4), zero |
| ; RV32V-NEXT: vlse64.v v4, (s11), zero |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr s4, vlenb |
| ; RV32V-NEXT: slli s4, s4, 2 |
| ; RV32V-NEXT: mv s5, s4 |
| ; RV32V-NEXT: slli s4, s4, 1 |
| ; RV32V-NEXT: add s4, s4, s5 |
| ; RV32V-NEXT: add s4, sp, s4 |
| ; RV32V-NEXT: addi s4, s4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr s4, vlenb |
| ; RV32V-NEXT: slli s4, s4, 3 |
| ; RV32V-NEXT: mv s5, s4 |
| ; RV32V-NEXT: slli s4, s4, 2 |
| ; RV32V-NEXT: add s4, s4, s5 |
| ; RV32V-NEXT: add s4, sp, s4 |
| ; RV32V-NEXT: addi s4, s4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v28 |
| ; RV32V-NEXT: csrr s4, vlenb |
| ; RV32V-NEXT: slli s4, s4, 6 |
| ; RV32V-NEXT: add s4, sp, s4 |
| ; RV32V-NEXT: addi s4, s4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v4 |
| ; RV32V-NEXT: csrr s4, vlenb |
| ; RV32V-NEXT: slli s4, s4, 3 |
| ; RV32V-NEXT: mv s5, s4 |
| ; RV32V-NEXT: slli s4, s4, 1 |
| ; RV32V-NEXT: add s5, s5, s4 |
| ; RV32V-NEXT: slli s4, s4, 2 |
| ; RV32V-NEXT: add s4, s4, s5 |
| ; RV32V-NEXT: add s4, sp, s4 |
| ; RV32V-NEXT: addi s4, s4, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s3), zero |
| ; RV32V-NEXT: vlse64.v v28, (s2), zero |
| ; RV32V-NEXT: vlse64.v v4, (s1), zero |
| ; RV32V-NEXT: vlse64.v v0, (s0), zero |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr s0, vlenb |
| ; RV32V-NEXT: slli s0, s0, 3 |
| ; RV32V-NEXT: add s0, sp, s0 |
| ; RV32V-NEXT: addi s0, s0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v28 |
| ; RV32V-NEXT: csrr s0, vlenb |
| ; RV32V-NEXT: slli s0, s0, 2 |
| ; RV32V-NEXT: mv s1, s0 |
| ; RV32V-NEXT: slli s0, s0, 3 |
| ; RV32V-NEXT: add s0, s0, s1 |
| ; RV32V-NEXT: add s0, sp, s0 |
| ; RV32V-NEXT: addi s0, s0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v4 |
| ; RV32V-NEXT: csrr s0, vlenb |
| ; RV32V-NEXT: slli s0, s0, 2 |
| ; RV32V-NEXT: mv s1, s0 |
| ; RV32V-NEXT: slli s0, s0, 1 |
| ; RV32V-NEXT: add s1, s1, s0 |
| ; RV32V-NEXT: slli s0, s0, 1 |
| ; RV32V-NEXT: add s1, s1, s0 |
| ; RV32V-NEXT: slli s0, s0, 1 |
| ; RV32V-NEXT: add s0, s0, s1 |
| ; RV32V-NEXT: add s0, sp, s0 |
| ; RV32V-NEXT: addi s0, s0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v0 |
| ; RV32V-NEXT: csrr s0, vlenb |
| ; RV32V-NEXT: slli s0, s0, 2 |
| ; RV32V-NEXT: mv s1, s0 |
| ; RV32V-NEXT: slli s0, s0, 2 |
| ; RV32V-NEXT: add s1, s1, s0 |
| ; RV32V-NEXT: slli s0, s0, 2 |
| ; RV32V-NEXT: add s0, s0, s1 |
| ; RV32V-NEXT: add s0, sp, s0 |
| ; RV32V-NEXT: addi s0, s0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (s0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v28, (t6), zero |
| ; RV32V-NEXT: vlse64.v v4, (t5), zero |
| ; RV32V-NEXT: vlse64.v v0, (t4), zero |
| ; RV32V-NEXT: vlse64.v v16, (t3), zero |
| ; RV32V-NEXT: vand.vv v20, v12, v28 |
| ; RV32V-NEXT: csrr t3, vlenb |
| ; RV32V-NEXT: slli t3, t3, 2 |
| ; RV32V-NEXT: add t3, sp, t3 |
| ; RV32V-NEXT: addi t3, t3, 288 |
| ; RV32V-NEXT: vs4r.v v20, (t3) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v20, v12, v4 |
| ; RV32V-NEXT: csrr t3, vlenb |
| ; RV32V-NEXT: slli t3, t3, 5 |
| ; RV32V-NEXT: add t3, sp, t3 |
| ; RV32V-NEXT: addi t3, t3, 288 |
| ; RV32V-NEXT: vs4r.v v20, (t3) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v20, v12, v0 |
| ; RV32V-NEXT: csrr t3, vlenb |
| ; RV32V-NEXT: slli t3, t3, 3 |
| ; RV32V-NEXT: mv t4, t3 |
| ; RV32V-NEXT: slli t3, t3, 1 |
| ; RV32V-NEXT: add t4, t4, t3 |
| ; RV32V-NEXT: slli t3, t3, 1 |
| ; RV32V-NEXT: add t3, t3, t4 |
| ; RV32V-NEXT: add t3, sp, t3 |
| ; RV32V-NEXT: addi t3, t3, 288 |
| ; RV32V-NEXT: vs4r.v v20, (t3) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v16 |
| ; RV32V-NEXT: csrr t3, vlenb |
| ; RV32V-NEXT: slli t3, t3, 4 |
| ; RV32V-NEXT: mv t4, t3 |
| ; RV32V-NEXT: slli t3, t3, 2 |
| ; RV32V-NEXT: add t3, t3, t4 |
| ; RV32V-NEXT: add t3, sp, t3 |
| ; RV32V-NEXT: addi t3, t3, 288 |
| ; RV32V-NEXT: vs4r.v v16, (t3) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v16, (t2), zero |
| ; RV32V-NEXT: vlse64.v v0, (t1), zero |
| ; RV32V-NEXT: vlse64.v v20, (t0), zero |
| ; RV32V-NEXT: vlse64.v v24, (a7), zero |
| ; RV32V-NEXT: vand.vv v4, v12, v16 |
| ; RV32V-NEXT: vand.vv v16, v12, v0 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a7) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a7) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 3 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a7) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v16, (a6), zero |
| ; RV32V-NEXT: vlse64.v v20, (a5), zero |
| ; RV32V-NEXT: vlse64.v v24, (a4), zero |
| ; RV32V-NEXT: vlse64.v v28, (a2), zero |
| ; RV32V-NEXT: vand.vv v0, v12, v16 |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 3 |
| ; RV32V-NEXT: mv a4, a2 |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: add a2, a2, a4 |
| ; RV32V-NEXT: add a2, sp, a2 |
| ; RV32V-NEXT: addi a2, a2, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v24 |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 4 |
| ; RV32V-NEXT: mv a4, a2 |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: add a2, a2, a4 |
| ; RV32V-NEXT: add a2, sp, a2 |
| ; RV32V-NEXT: addi a2, a2, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v28 |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 3 |
| ; RV32V-NEXT: mv a4, a2 |
| ; RV32V-NEXT: slli a2, a2, 3 |
| ; RV32V-NEXT: add a2, a2, a4 |
| ; RV32V-NEXT: add a2, sp, a2 |
| ; RV32V-NEXT: addi a2, a2, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v16, (a3), zero |
| ; RV32V-NEXT: vlse64.v v20, (a1), zero |
| ; RV32V-NEXT: vlse64.v v24, (ra), zero |
| ; RV32V-NEXT: vlse64.v v28, (a0), zero |
| ; RV32V-NEXT: vand.vv v16, v12, v16 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v16, v12, v20 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v12, v24 |
| ; RV32V-NEXT: vand.vv v20, v12, v28 |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: vand.vx v12, v12, a0 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vxor.vv v12, v16, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 6 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vv v16, v8, v4 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vv v16, v8, v0 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vv v16, v8, v24 |
| ; RV32V-NEXT: vxor.vv v12, v12, v16 |
| ; RV32V-NEXT: vmul.vv v8, v8, v20 |
| ; RV32V-NEXT: vxor.vv v8, v12, v8 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add sp, sp, a0 |
| ; RV32V-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 352 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv4i64_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: andi a1, a0, 2 |
| ; RV64V-NEXT: andi a2, a0, 1 |
| ; RV64V-NEXT: vsetvli a3, zero, e64, m4, ta, ma |
| ; RV64V-NEXT: vmul.vx v12, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 4 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 8 |
| ; RV64V-NEXT: vxor.vv v12, v16, v12 |
| ; RV64V-NEXT: vmul.vx v16, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 16 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 32 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 64 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 128 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 256 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 512 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a1 |
| ; RV64V-NEXT: andi a3, a0, 1024 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: li a1, 1 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a3 |
| ; RV64V-NEXT: slli a2, a1, 11 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 1 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 2 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 4 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 8 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 16 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 32 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 64 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 128 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 256 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 512 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 1024 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 2048 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 4096 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 8192 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 16384 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 32768 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 65536 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 131072 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: lui a2, 262144 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: srliw a2, a0, 31 |
| ; RV64V-NEXT: slli a2, a2, 31 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 32 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 33 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 34 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 35 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 36 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 37 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 38 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 39 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 40 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 41 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 42 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 43 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 44 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 45 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 46 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 47 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 48 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 49 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 50 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 51 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 52 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 53 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 54 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 55 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 56 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 57 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 58 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 59 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 60 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 61 |
| ; RV64V-NEXT: slli a1, a1, 62 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: and a1, a0, a1 |
| ; RV64V-NEXT: srli a0, a0, 63 |
| ; RV64V-NEXT: slli a0, a0, 63 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a2 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v16, v8, a1 |
| ; RV64V-NEXT: vxor.vv v12, v12, v16 |
| ; RV64V-NEXT: vmul.vx v8, v8, a0 |
| ; RV64V-NEXT: vxor.vv v8, v12, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv4i64_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: addi sp, sp, -16 |
| ; RV32ZVBC64-NEXT: sw a0, 8(sp) |
| ; RV32ZVBC64-NEXT: sw a1, 12(sp) |
| ; RV32ZVBC64-NEXT: addi a0, sp, 8 |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e64, m4, ta, ma |
| ; RV32ZVBC64-NEXT: vlse64.v v12, (a0), zero |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v8, v12 |
| ; RV32ZVBC64-NEXT: addi sp, sp, 16 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv4i64_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e64, m4, ta, ma |
| ; RV64ZVBC64-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv4i64_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: addi sp, sp, -352 |
| ; RV32ZVBC32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 3 |
| ; RV32ZVBC32-NEXT: mv a3, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: add a3, a3, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: add a3, a3, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a3 |
| ; RV32ZVBC32-NEXT: sub sp, sp, a2 |
| ; RV32ZVBC32-NEXT: sw a0, 8(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 12(sp) |
| ; RV32ZVBC32-NEXT: addi a1, sp, 8 |
| ; RV32ZVBC32-NEXT: lui a2, 524288 |
| ; RV32ZVBC32-NEXT: li s11, 1 |
| ; RV32ZVBC32-NEXT: li a0, 2 |
| ; RV32ZVBC32-NEXT: li s9, 4 |
| ; RV32ZVBC32-NEXT: li s10, 8 |
| ; RV32ZVBC32-NEXT: li ra, 64 |
| ; RV32ZVBC32-NEXT: li s8, 256 |
| ; RV32ZVBC32-NEXT: li s7, 512 |
| ; RV32ZVBC32-NEXT: li s6, 1024 |
| ; RV32ZVBC32-NEXT: lui s5, 1 |
| ; RV32ZVBC32-NEXT: lui s4, 2 |
| ; RV32ZVBC32-NEXT: lui s3, 4 |
| ; RV32ZVBC32-NEXT: lui s2, 8 |
| ; RV32ZVBC32-NEXT: lui s1, 16 |
| ; RV32ZVBC32-NEXT: lui s0, 32 |
| ; RV32ZVBC32-NEXT: lui t6, 64 |
| ; RV32ZVBC32-NEXT: lui t5, 128 |
| ; RV32ZVBC32-NEXT: lui t4, 256 |
| ; RV32ZVBC32-NEXT: lui t2, 512 |
| ; RV32ZVBC32-NEXT: lui t1, 1024 |
| ; RV32ZVBC32-NEXT: lui t0, 2048 |
| ; RV32ZVBC32-NEXT: lui a7, 4096 |
| ; RV32ZVBC32-NEXT: lui a6, 8192 |
| ; RV32ZVBC32-NEXT: lui a5, 16384 |
| ; RV32ZVBC32-NEXT: vsetvli a4, zero, e64, m4, ta, ma |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (a1), zero |
| ; RV32ZVBC32-NEXT: lui a3, 32768 |
| ; RV32ZVBC32-NEXT: sw a2, 16(sp) |
| ; RV32ZVBC32-NEXT: lui t3, 524288 |
| ; RV32ZVBC32-NEXT: sw zero, 20(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 272(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 276(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 264(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 268(sp) |
| ; RV32ZVBC32-NEXT: lui a2, 65536 |
| ; RV32ZVBC32-NEXT: sw zero, 256(sp) |
| ; RV32ZVBC32-NEXT: sw s9, 260(sp) |
| ; RV32ZVBC32-NEXT: lui a1, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 248(sp) |
| ; RV32ZVBC32-NEXT: sw s10, 252(sp) |
| ; RV32ZVBC32-NEXT: lui a0, 262144 |
| ; RV32ZVBC32-NEXT: sw zero, 240(sp) |
| ; RV32ZVBC32-NEXT: li a4, 16 |
| ; RV32ZVBC32-NEXT: sw a4, 244(sp) |
| ; RV32ZVBC32-NEXT: li s10, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 232(sp) |
| ; RV32ZVBC32-NEXT: li a4, 32 |
| ; RV32ZVBC32-NEXT: sw a4, 236(sp) |
| ; RV32ZVBC32-NEXT: li a4, 32 |
| ; RV32ZVBC32-NEXT: sw zero, 224(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 228(sp) |
| ; RV32ZVBC32-NEXT: li ra, 64 |
| ; RV32ZVBC32-NEXT: sw zero, 216(sp) |
| ; RV32ZVBC32-NEXT: li s9, 128 |
| ; RV32ZVBC32-NEXT: sw s9, 220(sp) |
| ; RV32ZVBC32-NEXT: li s9, 128 |
| ; RV32ZVBC32-NEXT: sw zero, 208(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 212(sp) |
| ; RV32ZVBC32-NEXT: li s8, 256 |
| ; RV32ZVBC32-NEXT: sw zero, 200(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 204(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 192(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 196(sp) |
| ; RV32ZVBC32-NEXT: slli s11, s11, 11 |
| ; RV32ZVBC32-NEXT: sw zero, 184(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 188(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 176(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 180(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 168(sp) |
| ; RV32ZVBC32-NEXT: sw s4, 172(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 160(sp) |
| ; RV32ZVBC32-NEXT: sw s3, 164(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 152(sp) |
| ; RV32ZVBC32-NEXT: sw s2, 156(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 144(sp) |
| ; RV32ZVBC32-NEXT: sw s1, 148(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 136(sp) |
| ; RV32ZVBC32-NEXT: sw s0, 140(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 128(sp) |
| ; RV32ZVBC32-NEXT: sw t6, 132(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 120(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 124(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 112(sp) |
| ; RV32ZVBC32-NEXT: sw t4, 116(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 104(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 108(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 96(sp) |
| ; RV32ZVBC32-NEXT: sw t1, 100(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 88(sp) |
| ; RV32ZVBC32-NEXT: sw t0, 92(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 80(sp) |
| ; RV32ZVBC32-NEXT: sw a7, 84(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 72(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 76(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 64(sp) |
| ; RV32ZVBC32-NEXT: sw a5, 68(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 56(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 60(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 48(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 52(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 40(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 44(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 32(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 36(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 24(sp) |
| ; RV32ZVBC32-NEXT: sw t3, 28(sp) |
| ; RV32ZVBC32-NEXT: addi a0, sp, 16 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a0), zero |
| ; RV32ZVBC32-NEXT: addi a0, sp, 272 |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (a0), zero |
| ; RV32ZVBC32-NEXT: addi a0, sp, 264 |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (a0), zero |
| ; RV32ZVBC32-NEXT: addi a0, sp, 256 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a0), zero |
| ; RV32ZVBC32-NEXT: vand.vi v4, v12, 2 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v12, 1 |
| ; RV32ZVBC32-NEXT: vmul.vv v4, v8, v4 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v0, v4 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v12, 4 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v12, 8 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, s10 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a4 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, ra |
| ; RV32ZVBC32-NEXT: addi s10, sp, 224 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, s9 |
| ; RV32ZVBC32-NEXT: addi s7, sp, 216 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, s8 |
| ; RV32ZVBC32-NEXT: addi s6, sp, 208 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: li a0, 512 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi s5, sp, 200 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: li a0, 1024 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi s4, sp, 192 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, s11 |
| ; RV32ZVBC32-NEXT: addi s11, sp, 184 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 1 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi s3, sp, 176 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 2 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi s2, sp, 168 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 4 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi s1, sp, 160 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 8 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi s0, sp, 152 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 16 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi t6, sp, 144 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 32 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi t5, sp, 136 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 64 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi t4, sp, 128 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 128 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi t3, sp, 120 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 256 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi t2, sp, 112 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 512 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi t1, sp, 104 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 1024 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi t0, sp, 96 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 2048 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi a7, sp, 88 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 4096 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi a6, sp, 80 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 8192 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi a5, sp, 72 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 16384 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi a4, sp, 64 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a3 |
| ; RV32ZVBC32-NEXT: addi a2, sp, 56 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 65536 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a0 |
| ; RV32ZVBC32-NEXT: addi a3, sp, 48 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v12, a1 |
| ; RV32ZVBC32-NEXT: addi a1, sp, 40 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v4, v4, v0 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s8 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a0, sp, 248 |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (a0), zero |
| ; RV32ZVBC32-NEXT: addi ra, sp, 32 |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: mv s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s8 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add s8, s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s8 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s8 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s8 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v4 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, s8 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a0, sp, 24 |
| ; RV32ZVBC32-NEXT: addi s8, sp, 240 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (s8), zero |
| ; RV32ZVBC32-NEXT: addi s8, sp, 232 |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (s8), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s10), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (s7), zero |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 4 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s7) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: mv s8, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: add s7, s7, s8 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s7) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: mv s8, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 4 |
| ; RV32ZVBC32-NEXT: add s7, s7, s8 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s7) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: mv s8, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: add s7, s7, s8 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s7) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (s6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (s4), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (s11), zero |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr s4, vlenb |
| ; RV32ZVBC32-NEXT: slli s4, s4, 2 |
| ; RV32ZVBC32-NEXT: mv s5, s4 |
| ; RV32ZVBC32-NEXT: slli s4, s4, 1 |
| ; RV32ZVBC32-NEXT: add s4, s4, s5 |
| ; RV32ZVBC32-NEXT: add s4, sp, s4 |
| ; RV32ZVBC32-NEXT: addi s4, s4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr s4, vlenb |
| ; RV32ZVBC32-NEXT: slli s4, s4, 3 |
| ; RV32ZVBC32-NEXT: mv s5, s4 |
| ; RV32ZVBC32-NEXT: slli s4, s4, 2 |
| ; RV32ZVBC32-NEXT: add s4, s4, s5 |
| ; RV32ZVBC32-NEXT: add s4, sp, s4 |
| ; RV32ZVBC32-NEXT: addi s4, s4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr s4, vlenb |
| ; RV32ZVBC32-NEXT: slli s4, s4, 6 |
| ; RV32ZVBC32-NEXT: add s4, sp, s4 |
| ; RV32ZVBC32-NEXT: addi s4, s4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v4 |
| ; RV32ZVBC32-NEXT: csrr s4, vlenb |
| ; RV32ZVBC32-NEXT: slli s4, s4, 3 |
| ; RV32ZVBC32-NEXT: mv s5, s4 |
| ; RV32ZVBC32-NEXT: slli s4, s4, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s4 |
| ; RV32ZVBC32-NEXT: slli s4, s4, 2 |
| ; RV32ZVBC32-NEXT: add s4, s4, s5 |
| ; RV32ZVBC32-NEXT: add s4, sp, s4 |
| ; RV32ZVBC32-NEXT: addi s4, s4, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s3), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (s2), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (s1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s0), zero |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr s0, vlenb |
| ; RV32ZVBC32-NEXT: slli s0, s0, 3 |
| ; RV32ZVBC32-NEXT: add s0, sp, s0 |
| ; RV32ZVBC32-NEXT: addi s0, s0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr s0, vlenb |
| ; RV32ZVBC32-NEXT: slli s0, s0, 2 |
| ; RV32ZVBC32-NEXT: mv s1, s0 |
| ; RV32ZVBC32-NEXT: slli s0, s0, 3 |
| ; RV32ZVBC32-NEXT: add s0, s0, s1 |
| ; RV32ZVBC32-NEXT: add s0, sp, s0 |
| ; RV32ZVBC32-NEXT: addi s0, s0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v4 |
| ; RV32ZVBC32-NEXT: csrr s0, vlenb |
| ; RV32ZVBC32-NEXT: slli s0, s0, 2 |
| ; RV32ZVBC32-NEXT: mv s1, s0 |
| ; RV32ZVBC32-NEXT: slli s0, s0, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, s0 |
| ; RV32ZVBC32-NEXT: slli s0, s0, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, s0 |
| ; RV32ZVBC32-NEXT: slli s0, s0, 1 |
| ; RV32ZVBC32-NEXT: add s0, s0, s1 |
| ; RV32ZVBC32-NEXT: add s0, sp, s0 |
| ; RV32ZVBC32-NEXT: addi s0, s0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v0 |
| ; RV32ZVBC32-NEXT: csrr s0, vlenb |
| ; RV32ZVBC32-NEXT: slli s0, s0, 2 |
| ; RV32ZVBC32-NEXT: mv s1, s0 |
| ; RV32ZVBC32-NEXT: slli s0, s0, 2 |
| ; RV32ZVBC32-NEXT: add s1, s1, s0 |
| ; RV32ZVBC32-NEXT: slli s0, s0, 2 |
| ; RV32ZVBC32-NEXT: add s0, s0, s1 |
| ; RV32ZVBC32-NEXT: add s0, sp, s0 |
| ; RV32ZVBC32-NEXT: addi s0, s0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (s0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (t6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (t5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (t4), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (t3), zero |
| ; RV32ZVBC32-NEXT: vand.vv v20, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr t3, vlenb |
| ; RV32ZVBC32-NEXT: slli t3, t3, 2 |
| ; RV32ZVBC32-NEXT: add t3, sp, t3 |
| ; RV32ZVBC32-NEXT: addi t3, t3, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v20, (t3) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v20, v12, v4 |
| ; RV32ZVBC32-NEXT: csrr t3, vlenb |
| ; RV32ZVBC32-NEXT: slli t3, t3, 5 |
| ; RV32ZVBC32-NEXT: add t3, sp, t3 |
| ; RV32ZVBC32-NEXT: addi t3, t3, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v20, (t3) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v20, v12, v0 |
| ; RV32ZVBC32-NEXT: csrr t3, vlenb |
| ; RV32ZVBC32-NEXT: slli t3, t3, 3 |
| ; RV32ZVBC32-NEXT: mv t4, t3 |
| ; RV32ZVBC32-NEXT: slli t3, t3, 1 |
| ; RV32ZVBC32-NEXT: add t4, t4, t3 |
| ; RV32ZVBC32-NEXT: slli t3, t3, 1 |
| ; RV32ZVBC32-NEXT: add t3, t3, t4 |
| ; RV32ZVBC32-NEXT: add t3, sp, t3 |
| ; RV32ZVBC32-NEXT: addi t3, t3, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v20, (t3) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr t3, vlenb |
| ; RV32ZVBC32-NEXT: slli t3, t3, 4 |
| ; RV32ZVBC32-NEXT: mv t4, t3 |
| ; RV32ZVBC32-NEXT: slli t3, t3, 2 |
| ; RV32ZVBC32-NEXT: add t3, t3, t4 |
| ; RV32ZVBC32-NEXT: add t3, sp, t3 |
| ; RV32ZVBC32-NEXT: addi t3, t3, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (t3) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (t2), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (t1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (t0), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a7), zero |
| ; RV32ZVBC32-NEXT: vand.vv v4, v12, v16 |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v0 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a7) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a7) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 3 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a7) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (a5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a4), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (a2), zero |
| ; RV32ZVBC32-NEXT: vand.vv v0, v12, v16 |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 3 |
| ; RV32ZVBC32-NEXT: mv a4, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a4 |
| ; RV32ZVBC32-NEXT: add a2, sp, a2 |
| ; RV32ZVBC32-NEXT: addi a2, a2, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v24 |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 4 |
| ; RV32ZVBC32-NEXT: mv a4, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a4 |
| ; RV32ZVBC32-NEXT: add a2, sp, a2 |
| ; RV32ZVBC32-NEXT: addi a2, a2, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v28 |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 3 |
| ; RV32ZVBC32-NEXT: mv a4, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 3 |
| ; RV32ZVBC32-NEXT: add a2, a2, a4 |
| ; RV32ZVBC32-NEXT: add a2, sp, a2 |
| ; RV32ZVBC32-NEXT: addi a2, a2, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a3), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (a1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (ra), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (a0), zero |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v16 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v16, v12, v20 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v12, v24 |
| ; RV32ZVBC32-NEXT: vand.vv v20, v12, v28 |
| ; RV32ZVBC32-NEXT: lui a0, 262144 |
| ; RV32ZVBC32-NEXT: vand.vx v12, v12, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v16, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 6 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV32ZVBC32-NEXT: vmul.vv v8, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v8, v12, v8 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: addi sp, sp, 352 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv4i64_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: andi a1, a0, 2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 1 |
| ; RV64ZVBC32-NEXT: vsetvli a3, zero, e64, m4, ta, ma |
| ; RV64ZVBC32-NEXT: vmul.vx v12, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 4 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 8 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v16, v12 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 16 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 32 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 64 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 128 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 256 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 512 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a3, a0, 1024 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: li a1, 1 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a3 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 11 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 1 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 2 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 4 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 8 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 16 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 32 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 64 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 128 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 256 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 512 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 1024 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 2048 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 4096 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 8192 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 16384 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 32768 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 65536 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 131072 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 262144 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: srliw a2, a0, 31 |
| ; RV64ZVBC32-NEXT: slli a2, a2, 31 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 32 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 33 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 34 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 35 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 36 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 37 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 38 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 39 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 40 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 41 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 42 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 43 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 44 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 45 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 46 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 47 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 48 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 49 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 50 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 51 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 52 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 53 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 54 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 55 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 56 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 57 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 58 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 59 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 60 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 61 |
| ; RV64ZVBC32-NEXT: slli a1, a1, 62 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: and a1, a0, a1 |
| ; RV64ZVBC32-NEXT: srli a0, a0, 63 |
| ; RV64ZVBC32-NEXT: slli a0, a0, 63 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a1 |
| ; RV64ZVBC32-NEXT: vxor.vv v12, v12, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: vxor.vv v8, v12, v8 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 |
| %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer |
| %v = call <vscale x 4 x i64> @llvm.clmul.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb) |
| ret <vscale x 4 x i64> %v |
| } |
| |
| define <vscale x 8 x i64> @clmul_nxv8i64_vv(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb) nounwind { |
| ; RV32V-LABEL: clmul_nxv8i64_vv: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -352 |
| ; RV32V-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: sub sp, sp, a0 |
| ; RV32V-NEXT: lui a1, 524288 |
| ; RV32V-NEXT: li s5, 1 |
| ; RV32V-NEXT: li a3, 2 |
| ; RV32V-NEXT: li a2, 4 |
| ; RV32V-NEXT: li s10, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: li t6, 32 |
| ; RV32V-NEXT: li s1, 64 |
| ; RV32V-NEXT: li s3, 128 |
| ; RV32V-NEXT: li s7, 256 |
| ; RV32V-NEXT: li s4, 512 |
| ; RV32V-NEXT: li s8, 1024 |
| ; RV32V-NEXT: lui ra, 1 |
| ; RV32V-NEXT: lui s11, 2 |
| ; RV32V-NEXT: lui s9, 4 |
| ; RV32V-NEXT: lui s6, 8 |
| ; RV32V-NEXT: lui s2, 16 |
| ; RV32V-NEXT: lui s0, 32 |
| ; RV32V-NEXT: lui t5, 64 |
| ; RV32V-NEXT: lui t4, 128 |
| ; RV32V-NEXT: lui t3, 256 |
| ; RV32V-NEXT: lui t2, 512 |
| ; RV32V-NEXT: lui t1, 1024 |
| ; RV32V-NEXT: lui t0, 2048 |
| ; RV32V-NEXT: lui a7, 4096 |
| ; RV32V-NEXT: lui a6, 8192 |
| ; RV32V-NEXT: lui a5, 16384 |
| ; RV32V-NEXT: lui a4, 32768 |
| ; RV32V-NEXT: sw a1, 16(sp) |
| ; RV32V-NEXT: sw zero, 20(sp) |
| ; RV32V-NEXT: sw zero, 272(sp) |
| ; RV32V-NEXT: sw s5, 276(sp) |
| ; RV32V-NEXT: sw zero, 264(sp) |
| ; RV32V-NEXT: sw a3, 268(sp) |
| ; RV32V-NEXT: lui a3, 65536 |
| ; RV32V-NEXT: sw zero, 256(sp) |
| ; RV32V-NEXT: sw a2, 260(sp) |
| ; RV32V-NEXT: lui a2, 131072 |
| ; RV32V-NEXT: sw zero, 248(sp) |
| ; RV32V-NEXT: sw s10, 252(sp) |
| ; RV32V-NEXT: vsetvli s10, zero, e64, m8, ta, ma |
| ; RV32V-NEXT: vand.vi v24, v16, 2 |
| ; RV32V-NEXT: vand.vi v0, v16, 1 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v0, v24 |
| ; RV32V-NEXT: vand.vi v0, v16, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vi v0, v16, 8 |
| ; RV32V-NEXT: sw zero, 240(sp) |
| ; RV32V-NEXT: sw a0, 244(sp) |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: addi s10, sp, 16 |
| ; RV32V-NEXT: sw zero, 232(sp) |
| ; RV32V-NEXT: sw t6, 236(sp) |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t6 |
| ; RV32V-NEXT: sw zero, 224(sp) |
| ; RV32V-NEXT: sw s1, 228(sp) |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s1 |
| ; RV32V-NEXT: sw zero, 216(sp) |
| ; RV32V-NEXT: sw s3, 220(sp) |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s3 |
| ; RV32V-NEXT: sw zero, 208(sp) |
| ; RV32V-NEXT: sw s7, 212(sp) |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s7 |
| ; RV32V-NEXT: sw zero, 200(sp) |
| ; RV32V-NEXT: sw s4, 204(sp) |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s4 |
| ; RV32V-NEXT: sw zero, 192(sp) |
| ; RV32V-NEXT: sw s8, 196(sp) |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s8 |
| ; RV32V-NEXT: slli s5, s5, 11 |
| ; RV32V-NEXT: sw zero, 184(sp) |
| ; RV32V-NEXT: sw s5, 188(sp) |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s5 |
| ; RV32V-NEXT: addi s5, sp, 224 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, ra |
| ; RV32V-NEXT: sw zero, 176(sp) |
| ; RV32V-NEXT: sw ra, 180(sp) |
| ; RV32V-NEXT: addi ra, sp, 216 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s11 |
| ; RV32V-NEXT: sw zero, 168(sp) |
| ; RV32V-NEXT: sw s11, 172(sp) |
| ; RV32V-NEXT: addi s11, sp, 208 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s9 |
| ; RV32V-NEXT: sw zero, 160(sp) |
| ; RV32V-NEXT: sw s9, 164(sp) |
| ; RV32V-NEXT: addi s9, sp, 200 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s6 |
| ; RV32V-NEXT: sw zero, 152(sp) |
| ; RV32V-NEXT: sw s6, 156(sp) |
| ; RV32V-NEXT: addi s6, sp, 192 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s2 |
| ; RV32V-NEXT: sw zero, 144(sp) |
| ; RV32V-NEXT: sw s2, 148(sp) |
| ; RV32V-NEXT: addi s3, sp, 184 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s0 |
| ; RV32V-NEXT: sw zero, 136(sp) |
| ; RV32V-NEXT: sw s0, 140(sp) |
| ; RV32V-NEXT: addi s4, sp, 176 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t5 |
| ; RV32V-NEXT: sw zero, 128(sp) |
| ; RV32V-NEXT: sw t5, 132(sp) |
| ; RV32V-NEXT: addi s2, sp, 168 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t4 |
| ; RV32V-NEXT: sw zero, 120(sp) |
| ; RV32V-NEXT: sw t4, 124(sp) |
| ; RV32V-NEXT: addi s1, sp, 160 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t3 |
| ; RV32V-NEXT: sw zero, 112(sp) |
| ; RV32V-NEXT: sw t3, 116(sp) |
| ; RV32V-NEXT: addi t6, sp, 152 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t2 |
| ; RV32V-NEXT: sw zero, 104(sp) |
| ; RV32V-NEXT: sw t2, 108(sp) |
| ; RV32V-NEXT: addi s0, sp, 144 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t1 |
| ; RV32V-NEXT: sw zero, 96(sp) |
| ; RV32V-NEXT: sw t1, 100(sp) |
| ; RV32V-NEXT: addi t5, sp, 136 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t0 |
| ; RV32V-NEXT: sw zero, 88(sp) |
| ; RV32V-NEXT: sw t0, 92(sp) |
| ; RV32V-NEXT: addi t4, sp, 128 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a7 |
| ; RV32V-NEXT: sw zero, 80(sp) |
| ; RV32V-NEXT: sw a7, 84(sp) |
| ; RV32V-NEXT: addi t2, sp, 120 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a6 |
| ; RV32V-NEXT: sw zero, 72(sp) |
| ; RV32V-NEXT: sw a6, 76(sp) |
| ; RV32V-NEXT: addi t3, sp, 112 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a5 |
| ; RV32V-NEXT: sw zero, 64(sp) |
| ; RV32V-NEXT: sw a5, 68(sp) |
| ; RV32V-NEXT: addi t1, sp, 104 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a4 |
| ; RV32V-NEXT: sw zero, 56(sp) |
| ; RV32V-NEXT: sw a4, 60(sp) |
| ; RV32V-NEXT: addi t0, sp, 96 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a3 |
| ; RV32V-NEXT: sw zero, 48(sp) |
| ; RV32V-NEXT: sw a3, 52(sp) |
| ; RV32V-NEXT: addi a7, sp, 88 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a2 |
| ; RV32V-NEXT: sw zero, 40(sp) |
| ; RV32V-NEXT: sw a2, 44(sp) |
| ; RV32V-NEXT: sw zero, 32(sp) |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: sw a0, 36(sp) |
| ; RV32V-NEXT: sw zero, 24(sp) |
| ; RV32V-NEXT: sw a1, 28(sp) |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: sw t2, 4(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: mv a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 5 |
| ; RV32V-NEXT: add a1, a1, a2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s10), zero |
| ; RV32V-NEXT: addi a6, sp, 80 |
| ; RV32V-NEXT: addi a5, sp, 72 |
| ; RV32V-NEXT: addi a4, sp, 64 |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 8 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: addi a1, sp, 56 |
| ; RV32V-NEXT: addi s10, sp, 48 |
| ; RV32V-NEXT: addi a3, sp, 40 |
| ; RV32V-NEXT: addi a2, sp, 32 |
| ; RV32V-NEXT: addi s7, sp, 272 |
| ; RV32V-NEXT: vlse64.v v24, (s7), zero |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 4 |
| ; RV32V-NEXT: mv s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, s7 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: addi s7, sp, 264 |
| ; RV32V-NEXT: vlse64.v v0, (s7), zero |
| ; RV32V-NEXT: addi s7, sp, 256 |
| ; RV32V-NEXT: vlse64.v v24, (s7), zero |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: mv s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, s7 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: addi s7, sp, 248 |
| ; RV32V-NEXT: vlse64.v v24, (s7), zero |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: mv s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 2 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, s7 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 4 |
| ; RV32V-NEXT: mv s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, s7 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 5 |
| ; RV32V-NEXT: mv s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, s7 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: mv s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 2 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, s7 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: mv s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, s7 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 4 |
| ; RV32V-NEXT: mv s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, s7 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: mv s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 2 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, s7 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: mv s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add s7, s7, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, s7 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: addi s7, sp, 24 |
| ; RV32V-NEXT: addi s8, sp, 240 |
| ; RV32V-NEXT: vlse64.v v24, (s8), zero |
| ; RV32V-NEXT: csrr s8, vlenb |
| ; RV32V-NEXT: slli s8, s8, 4 |
| ; RV32V-NEXT: mv t2, s8 |
| ; RV32V-NEXT: slli s8, s8, 2 |
| ; RV32V-NEXT: add t2, t2, s8 |
| ; RV32V-NEXT: slli s8, s8, 1 |
| ; RV32V-NEXT: add s8, s8, t2 |
| ; RV32V-NEXT: lw t2, 4(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: add s8, sp, s8 |
| ; RV32V-NEXT: addi s8, s8, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s8) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: addi s8, sp, 232 |
| ; RV32V-NEXT: vlse64.v v0, (s8), zero |
| ; RV32V-NEXT: vlse64.v v24, (s5), zero |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: mv s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s8 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (ra), zero |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: mv s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s8 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 4 |
| ; RV32V-NEXT: mv s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s8 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: mv s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s8 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 4 |
| ; RV32V-NEXT: mv s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s8 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: mv s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s8 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: mv s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: add s5, s5, s8 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: mv s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s8 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: mv s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s8 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (s11), zero |
| ; RV32V-NEXT: vlse64.v v24, (s9), zero |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: mv s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: add s8, s8, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s8 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s6), zero |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 4 |
| ; RV32V-NEXT: mv s6, s5 |
| ; RV32V-NEXT: slli s5, s5, 2 |
| ; RV32V-NEXT: add s6, s6, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s6 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s3), zero |
| ; RV32V-NEXT: csrr s3, vlenb |
| ; RV32V-NEXT: slli s3, s3, 6 |
| ; RV32V-NEXT: mv s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 1 |
| ; RV32V-NEXT: add s3, s3, s5 |
| ; RV32V-NEXT: add s3, sp, s3 |
| ; RV32V-NEXT: addi s3, s3, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v0, v16, v0 |
| ; RV32V-NEXT: csrr s3, vlenb |
| ; RV32V-NEXT: slli s3, s3, 4 |
| ; RV32V-NEXT: mv s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 1 |
| ; RV32V-NEXT: add s3, s3, s5 |
| ; RV32V-NEXT: add s3, sp, s3 |
| ; RV32V-NEXT: addi s3, s3, 288 |
| ; RV32V-NEXT: vs8r.v v0, (s3) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s3, vlenb |
| ; RV32V-NEXT: slli s3, s3, 3 |
| ; RV32V-NEXT: mv s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 3 |
| ; RV32V-NEXT: add s5, s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 1 |
| ; RV32V-NEXT: add s3, s3, s5 |
| ; RV32V-NEXT: add s3, sp, s3 |
| ; RV32V-NEXT: addi s3, s3, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s3, vlenb |
| ; RV32V-NEXT: slli s3, s3, 3 |
| ; RV32V-NEXT: mv s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 2 |
| ; RV32V-NEXT: add s5, s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 1 |
| ; RV32V-NEXT: add s3, s3, s5 |
| ; RV32V-NEXT: add s3, sp, s3 |
| ; RV32V-NEXT: addi s3, s3, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s3, vlenb |
| ; RV32V-NEXT: slli s3, s3, 4 |
| ; RV32V-NEXT: mv s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 2 |
| ; RV32V-NEXT: add s5, s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 1 |
| ; RV32V-NEXT: add s3, s3, s5 |
| ; RV32V-NEXT: add s3, sp, s3 |
| ; RV32V-NEXT: addi s3, s3, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s3, vlenb |
| ; RV32V-NEXT: slli s3, s3, 5 |
| ; RV32V-NEXT: mv s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 2 |
| ; RV32V-NEXT: add s3, s3, s5 |
| ; RV32V-NEXT: add s3, sp, s3 |
| ; RV32V-NEXT: addi s3, s3, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s3, vlenb |
| ; RV32V-NEXT: slli s3, s3, 6 |
| ; RV32V-NEXT: mv s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 1 |
| ; RV32V-NEXT: add s3, s3, s5 |
| ; RV32V-NEXT: add s3, sp, s3 |
| ; RV32V-NEXT: addi s3, s3, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s3, vlenb |
| ; RV32V-NEXT: slli s3, s3, 4 |
| ; RV32V-NEXT: mv s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 2 |
| ; RV32V-NEXT: add s5, s5, s3 |
| ; RV32V-NEXT: slli s3, s3, 1 |
| ; RV32V-NEXT: add s3, s3, s5 |
| ; RV32V-NEXT: add s3, sp, s3 |
| ; RV32V-NEXT: addi s3, s3, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s4), zero |
| ; RV32V-NEXT: csrr s3, vlenb |
| ; RV32V-NEXT: slli s3, s3, 6 |
| ; RV32V-NEXT: mv s4, s3 |
| ; RV32V-NEXT: slli s3, s3, 1 |
| ; RV32V-NEXT: add s3, s3, s4 |
| ; RV32V-NEXT: add s3, sp, s3 |
| ; RV32V-NEXT: addi s3, s3, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (s2), zero |
| ; RV32V-NEXT: vlse64.v v24, (s1), zero |
| ; RV32V-NEXT: csrr s1, vlenb |
| ; RV32V-NEXT: slli s1, s1, 3 |
| ; RV32V-NEXT: mv s2, s1 |
| ; RV32V-NEXT: slli s1, s1, 3 |
| ; RV32V-NEXT: add s2, s2, s1 |
| ; RV32V-NEXT: slli s1, s1, 1 |
| ; RV32V-NEXT: add s1, s1, s2 |
| ; RV32V-NEXT: add s1, sp, s1 |
| ; RV32V-NEXT: addi s1, s1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (t6), zero |
| ; RV32V-NEXT: csrr t6, vlenb |
| ; RV32V-NEXT: slli t6, t6, 3 |
| ; RV32V-NEXT: mv s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add s1, s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add s1, s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 2 |
| ; RV32V-NEXT: add t6, t6, s1 |
| ; RV32V-NEXT: add t6, sp, t6 |
| ; RV32V-NEXT: addi t6, t6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t6, vlenb |
| ; RV32V-NEXT: slli t6, t6, 6 |
| ; RV32V-NEXT: mv s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add t6, t6, s1 |
| ; RV32V-NEXT: add t6, sp, t6 |
| ; RV32V-NEXT: addi t6, t6, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t6, vlenb |
| ; RV32V-NEXT: slli t6, t6, 3 |
| ; RV32V-NEXT: mv s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 2 |
| ; RV32V-NEXT: add t6, t6, s1 |
| ; RV32V-NEXT: add t6, sp, t6 |
| ; RV32V-NEXT: addi t6, t6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr t6, vlenb |
| ; RV32V-NEXT: slli t6, t6, 5 |
| ; RV32V-NEXT: mv s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add t6, t6, s1 |
| ; RV32V-NEXT: add t6, sp, t6 |
| ; RV32V-NEXT: addi t6, t6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t6, vlenb |
| ; RV32V-NEXT: slli t6, t6, 3 |
| ; RV32V-NEXT: mv s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 3 |
| ; RV32V-NEXT: add s1, s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add t6, t6, s1 |
| ; RV32V-NEXT: add t6, sp, t6 |
| ; RV32V-NEXT: addi t6, t6, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t6, vlenb |
| ; RV32V-NEXT: slli t6, t6, 3 |
| ; RV32V-NEXT: mv s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add s1, s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 3 |
| ; RV32V-NEXT: add t6, t6, s1 |
| ; RV32V-NEXT: add t6, sp, t6 |
| ; RV32V-NEXT: addi t6, t6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t6, vlenb |
| ; RV32V-NEXT: slli t6, t6, 3 |
| ; RV32V-NEXT: mv s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add s1, s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add s1, s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 2 |
| ; RV32V-NEXT: add t6, t6, s1 |
| ; RV32V-NEXT: add t6, sp, t6 |
| ; RV32V-NEXT: addi t6, t6, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t6, vlenb |
| ; RV32V-NEXT: slli t6, t6, 3 |
| ; RV32V-NEXT: mv s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 3 |
| ; RV32V-NEXT: add s1, s1, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add t6, t6, s1 |
| ; RV32V-NEXT: add t6, sp, t6 |
| ; RV32V-NEXT: addi t6, t6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s0), zero |
| ; RV32V-NEXT: csrr t6, vlenb |
| ; RV32V-NEXT: slli t6, t6, 3 |
| ; RV32V-NEXT: mv s0, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add s0, s0, t6 |
| ; RV32V-NEXT: slli t6, t6, 1 |
| ; RV32V-NEXT: add s0, s0, t6 |
| ; RV32V-NEXT: slli t6, t6, 2 |
| ; RV32V-NEXT: add t6, t6, s0 |
| ; RV32V-NEXT: add t6, sp, t6 |
| ; RV32V-NEXT: addi t6, t6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (t5), zero |
| ; RV32V-NEXT: vlse64.v v24, (t4), zero |
| ; RV32V-NEXT: csrr t4, vlenb |
| ; RV32V-NEXT: slli t4, t4, 6 |
| ; RV32V-NEXT: mv t5, t4 |
| ; RV32V-NEXT: slli t4, t4, 1 |
| ; RV32V-NEXT: add t4, t4, t5 |
| ; RV32V-NEXT: add t4, sp, t4 |
| ; RV32V-NEXT: addi t4, t4, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t4) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (t2), zero |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 4 |
| ; RV32V-NEXT: mv t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t4, t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 2 |
| ; RV32V-NEXT: add t2, t2, t4 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: mv t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t4, t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t4, t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 2 |
| ; RV32V-NEXT: add t2, t2, t4 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 5 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: mv t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t4, t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 2 |
| ; RV32V-NEXT: add t2, t2, t4 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 6 |
| ; RV32V-NEXT: mv t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, t4 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 4 |
| ; RV32V-NEXT: mv t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 3 |
| ; RV32V-NEXT: add t2, t2, t4 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 4 |
| ; RV32V-NEXT: mv t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t4, t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 2 |
| ; RV32V-NEXT: add t2, t2, t4 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 6 |
| ; RV32V-NEXT: mv t4, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t2, t2, t4 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (t3), zero |
| ; RV32V-NEXT: csrr t2, vlenb |
| ; RV32V-NEXT: slli t2, t2, 4 |
| ; RV32V-NEXT: mv t3, t2 |
| ; RV32V-NEXT: slli t2, t2, 1 |
| ; RV32V-NEXT: add t3, t3, t2 |
| ; RV32V-NEXT: slli t2, t2, 2 |
| ; RV32V-NEXT: add t2, t2, t3 |
| ; RV32V-NEXT: add t2, sp, t2 |
| ; RV32V-NEXT: addi t2, t2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (t1), zero |
| ; RV32V-NEXT: vlse64.v v24, (t0), zero |
| ; RV32V-NEXT: csrr t0, vlenb |
| ; RV32V-NEXT: slli t0, t0, 3 |
| ; RV32V-NEXT: mv t1, t0 |
| ; RV32V-NEXT: slli t0, t0, 1 |
| ; RV32V-NEXT: add t1, t1, t0 |
| ; RV32V-NEXT: slli t0, t0, 1 |
| ; RV32V-NEXT: add t1, t1, t0 |
| ; RV32V-NEXT: slli t0, t0, 2 |
| ; RV32V-NEXT: add t0, t0, t1 |
| ; RV32V-NEXT: add t0, sp, t0 |
| ; RV32V-NEXT: addi t0, t0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (a7), zero |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 7 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 4 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 3 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 4 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 3 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 3 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 4 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 7 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 3 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (a6), zero |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 7 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (a5), zero |
| ; RV32V-NEXT: vlse64.v v24, (a4), zero |
| ; RV32V-NEXT: csrr a4, vlenb |
| ; RV32V-NEXT: slli a4, a4, 4 |
| ; RV32V-NEXT: mv a5, a4 |
| ; RV32V-NEXT: slli a4, a4, 1 |
| ; RV32V-NEXT: add a5, a5, a4 |
| ; RV32V-NEXT: slli a4, a4, 2 |
| ; RV32V-NEXT: add a4, a4, a5 |
| ; RV32V-NEXT: add a4, sp, a4 |
| ; RV32V-NEXT: addi a4, a4, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a4) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (a1), zero |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: mv a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a4, a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a4, a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, a1, a4 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 7 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 4 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: mv a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: add a1, a1, a4 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 4 |
| ; RV32V-NEXT: mv a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a4, a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, a1, a4 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 7 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: mv a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a4, a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a4, a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, a1, a4 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 4 |
| ; RV32V-NEXT: mv a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a4, a4, a1 |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, a1, a4 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s10), zero |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 6 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (a3), zero |
| ; RV32V-NEXT: vlse64.v v24, (a2), zero |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: mv a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a2, a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a2, a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, a1, a2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s7), zero |
| ; RV32V-NEXT: addi a1, sp, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 6 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 6 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: mv a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a2, a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a2, a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, a1, a2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 3 |
| ; RV32V-NEXT: mv a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a2, a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a2, a2, a1 |
| ; RV32V-NEXT: slli a1, a1, 1 |
| ; RV32V-NEXT: add a1, a1, a2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: addi a1, sp, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v0, v16, v24 |
| ; RV32V-NEXT: vand.vx v16, v16, a0 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vxor.vv v16, v24, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 8 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 6 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 6 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vv v8, v8, v0 |
| ; RV32V-NEXT: vxor.vv v8, v16, v8 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add sp, sp, a0 |
| ; RV32V-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 352 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv8i64_vv: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e64, m8, ta, ma |
| ; RV64V-NEXT: vand.vi v24, v16, 2 |
| ; RV64V-NEXT: vand.vi v0, v16, 1 |
| ; RV64V-NEXT: vmul.vv v24, v8, v24 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v0, v24 |
| ; RV64V-NEXT: vand.vi v0, v16, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vi v0, v16, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: li a1, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: li a1, 128 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: li a1, 512 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: li a2, 1024 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a2 |
| ; RV64V-NEXT: slli a1, a0, 11 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 1 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 2 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 4 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 8 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 16 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 64 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 128 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 256 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 512 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 1024 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 2048 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 4096 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 8192 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 16384 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 32768 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 65536 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 131072 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: lui a1, 262144 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 31 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 32 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 33 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 34 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 35 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 36 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 37 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 38 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 39 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 40 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 41 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 42 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 43 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 44 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 45 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 46 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 47 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 48 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 49 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 50 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 51 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 52 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 53 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 54 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 55 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 56 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 57 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 58 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 59 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 60 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: slli a1, a0, 61 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a1 |
| ; RV64V-NEXT: li a1, -1 |
| ; RV64V-NEXT: slli a0, a0, 62 |
| ; RV64V-NEXT: slli a1, a1, 63 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vand.vx v0, v16, a0 |
| ; RV64V-NEXT: vand.vx v16, v16, a1 |
| ; RV64V-NEXT: vmul.vv v0, v8, v0 |
| ; RV64V-NEXT: vxor.vv v24, v24, v0 |
| ; RV64V-NEXT: vmul.vv v8, v8, v16 |
| ; RV64V-NEXT: vxor.vv v8, v24, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv8i64_vv: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v8, v16 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv8i64_vv: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v8, v16 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv8i64_vv: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: addi sp, sp, -352 |
| ; RV32ZVBC32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: sub sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lui a1, 524288 |
| ; RV32ZVBC32-NEXT: li s5, 1 |
| ; RV32ZVBC32-NEXT: li a3, 2 |
| ; RV32ZVBC32-NEXT: li a2, 4 |
| ; RV32ZVBC32-NEXT: li s10, 8 |
| ; RV32ZVBC32-NEXT: li a0, 16 |
| ; RV32ZVBC32-NEXT: li t6, 32 |
| ; RV32ZVBC32-NEXT: li s1, 64 |
| ; RV32ZVBC32-NEXT: li s3, 128 |
| ; RV32ZVBC32-NEXT: li s7, 256 |
| ; RV32ZVBC32-NEXT: li s4, 512 |
| ; RV32ZVBC32-NEXT: li s8, 1024 |
| ; RV32ZVBC32-NEXT: lui ra, 1 |
| ; RV32ZVBC32-NEXT: lui s11, 2 |
| ; RV32ZVBC32-NEXT: lui s9, 4 |
| ; RV32ZVBC32-NEXT: lui s6, 8 |
| ; RV32ZVBC32-NEXT: lui s2, 16 |
| ; RV32ZVBC32-NEXT: lui s0, 32 |
| ; RV32ZVBC32-NEXT: lui t5, 64 |
| ; RV32ZVBC32-NEXT: lui t4, 128 |
| ; RV32ZVBC32-NEXT: lui t3, 256 |
| ; RV32ZVBC32-NEXT: lui t2, 512 |
| ; RV32ZVBC32-NEXT: lui t1, 1024 |
| ; RV32ZVBC32-NEXT: lui t0, 2048 |
| ; RV32ZVBC32-NEXT: lui a7, 4096 |
| ; RV32ZVBC32-NEXT: lui a6, 8192 |
| ; RV32ZVBC32-NEXT: lui a5, 16384 |
| ; RV32ZVBC32-NEXT: lui a4, 32768 |
| ; RV32ZVBC32-NEXT: sw a1, 16(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 20(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 272(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 276(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 264(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 268(sp) |
| ; RV32ZVBC32-NEXT: lui a3, 65536 |
| ; RV32ZVBC32-NEXT: sw zero, 256(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 260(sp) |
| ; RV32ZVBC32-NEXT: lui a2, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 248(sp) |
| ; RV32ZVBC32-NEXT: sw s10, 252(sp) |
| ; RV32ZVBC32-NEXT: vsetvli s10, zero, e64, m8, ta, ma |
| ; RV32ZVBC32-NEXT: vand.vi v24, v16, 2 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v16, 1 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v0, v24 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v16, 4 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v16, 8 |
| ; RV32ZVBC32-NEXT: sw zero, 240(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 244(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC32-NEXT: addi s10, sp, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 232(sp) |
| ; RV32ZVBC32-NEXT: sw t6, 236(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t6 |
| ; RV32ZVBC32-NEXT: sw zero, 224(sp) |
| ; RV32ZVBC32-NEXT: sw s1, 228(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s1 |
| ; RV32ZVBC32-NEXT: sw zero, 216(sp) |
| ; RV32ZVBC32-NEXT: sw s3, 220(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s3 |
| ; RV32ZVBC32-NEXT: sw zero, 208(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 212(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s7 |
| ; RV32ZVBC32-NEXT: sw zero, 200(sp) |
| ; RV32ZVBC32-NEXT: sw s4, 204(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s4 |
| ; RV32ZVBC32-NEXT: sw zero, 192(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 196(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s8 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 11 |
| ; RV32ZVBC32-NEXT: sw zero, 184(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 188(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s5 |
| ; RV32ZVBC32-NEXT: addi s5, sp, 224 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, ra |
| ; RV32ZVBC32-NEXT: sw zero, 176(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 180(sp) |
| ; RV32ZVBC32-NEXT: addi ra, sp, 216 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s11 |
| ; RV32ZVBC32-NEXT: sw zero, 168(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 172(sp) |
| ; RV32ZVBC32-NEXT: addi s11, sp, 208 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s9 |
| ; RV32ZVBC32-NEXT: sw zero, 160(sp) |
| ; RV32ZVBC32-NEXT: sw s9, 164(sp) |
| ; RV32ZVBC32-NEXT: addi s9, sp, 200 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s6 |
| ; RV32ZVBC32-NEXT: sw zero, 152(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 156(sp) |
| ; RV32ZVBC32-NEXT: addi s6, sp, 192 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s2 |
| ; RV32ZVBC32-NEXT: sw zero, 144(sp) |
| ; RV32ZVBC32-NEXT: sw s2, 148(sp) |
| ; RV32ZVBC32-NEXT: addi s3, sp, 184 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s0 |
| ; RV32ZVBC32-NEXT: sw zero, 136(sp) |
| ; RV32ZVBC32-NEXT: sw s0, 140(sp) |
| ; RV32ZVBC32-NEXT: addi s4, sp, 176 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t5 |
| ; RV32ZVBC32-NEXT: sw zero, 128(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 132(sp) |
| ; RV32ZVBC32-NEXT: addi s2, sp, 168 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t4 |
| ; RV32ZVBC32-NEXT: sw zero, 120(sp) |
| ; RV32ZVBC32-NEXT: sw t4, 124(sp) |
| ; RV32ZVBC32-NEXT: addi s1, sp, 160 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t3 |
| ; RV32ZVBC32-NEXT: sw zero, 112(sp) |
| ; RV32ZVBC32-NEXT: sw t3, 116(sp) |
| ; RV32ZVBC32-NEXT: addi t6, sp, 152 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t2 |
| ; RV32ZVBC32-NEXT: sw zero, 104(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 108(sp) |
| ; RV32ZVBC32-NEXT: addi s0, sp, 144 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t1 |
| ; RV32ZVBC32-NEXT: sw zero, 96(sp) |
| ; RV32ZVBC32-NEXT: sw t1, 100(sp) |
| ; RV32ZVBC32-NEXT: addi t5, sp, 136 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t0 |
| ; RV32ZVBC32-NEXT: sw zero, 88(sp) |
| ; RV32ZVBC32-NEXT: sw t0, 92(sp) |
| ; RV32ZVBC32-NEXT: addi t4, sp, 128 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a7 |
| ; RV32ZVBC32-NEXT: sw zero, 80(sp) |
| ; RV32ZVBC32-NEXT: sw a7, 84(sp) |
| ; RV32ZVBC32-NEXT: addi t2, sp, 120 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a6 |
| ; RV32ZVBC32-NEXT: sw zero, 72(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 76(sp) |
| ; RV32ZVBC32-NEXT: addi t3, sp, 112 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a5 |
| ; RV32ZVBC32-NEXT: sw zero, 64(sp) |
| ; RV32ZVBC32-NEXT: sw a5, 68(sp) |
| ; RV32ZVBC32-NEXT: addi t1, sp, 104 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a4 |
| ; RV32ZVBC32-NEXT: sw zero, 56(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 60(sp) |
| ; RV32ZVBC32-NEXT: addi t0, sp, 96 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a3 |
| ; RV32ZVBC32-NEXT: sw zero, 48(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 52(sp) |
| ; RV32ZVBC32-NEXT: addi a7, sp, 88 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a2 |
| ; RV32ZVBC32-NEXT: sw zero, 40(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 44(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 32(sp) |
| ; RV32ZVBC32-NEXT: lui a0, 262144 |
| ; RV32ZVBC32-NEXT: sw a0, 36(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 24(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 28(sp) |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: sw t2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: mv a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 5 |
| ; RV32ZVBC32-NEXT: add a1, a1, a2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s10), zero |
| ; RV32ZVBC32-NEXT: addi a6, sp, 80 |
| ; RV32ZVBC32-NEXT: addi a5, sp, 72 |
| ; RV32ZVBC32-NEXT: addi a4, sp, 64 |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 8 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a1, sp, 56 |
| ; RV32ZVBC32-NEXT: addi s10, sp, 48 |
| ; RV32ZVBC32-NEXT: addi a3, sp, 40 |
| ; RV32ZVBC32-NEXT: addi a2, sp, 32 |
| ; RV32ZVBC32-NEXT: addi s7, sp, 272 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s7), zero |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 4 |
| ; RV32ZVBC32-NEXT: mv s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, s7 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s7, sp, 264 |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s7), zero |
| ; RV32ZVBC32-NEXT: addi s7, sp, 256 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s7), zero |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: mv s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, s7 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s7, sp, 248 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s7), zero |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: mv s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 2 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, s7 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 4 |
| ; RV32ZVBC32-NEXT: mv s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, s7 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 5 |
| ; RV32ZVBC32-NEXT: mv s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, s7 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: mv s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 2 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, s7 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: mv s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, s7 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 4 |
| ; RV32ZVBC32-NEXT: mv s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, s7 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: mv s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 2 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, s7 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: mv s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, s7 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s7, sp, 24 |
| ; RV32ZVBC32-NEXT: addi s8, sp, 240 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s8), zero |
| ; RV32ZVBC32-NEXT: csrr s8, vlenb |
| ; RV32ZVBC32-NEXT: slli s8, s8, 4 |
| ; RV32ZVBC32-NEXT: mv t2, s8 |
| ; RV32ZVBC32-NEXT: slli s8, s8, 2 |
| ; RV32ZVBC32-NEXT: add t2, t2, s8 |
| ; RV32ZVBC32-NEXT: slli s8, s8, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t2 |
| ; RV32ZVBC32-NEXT: lw t2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: add s8, sp, s8 |
| ; RV32ZVBC32-NEXT: addi s8, s8, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s8) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s8, sp, 232 |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s8), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s5), zero |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: mv s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s8 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (ra), zero |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: mv s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s8 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 4 |
| ; RV32ZVBC32-NEXT: mv s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s8 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: mv s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s8 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 4 |
| ; RV32ZVBC32-NEXT: mv s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s8 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: mv s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s8 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: mv s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: add s5, s5, s8 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: mv s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s8 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: mv s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s8 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s11), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s9), zero |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: mv s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: add s8, s8, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s8 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s6), zero |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 4 |
| ; RV32ZVBC32-NEXT: mv s6, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 2 |
| ; RV32ZVBC32-NEXT: add s6, s6, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s6 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s3), zero |
| ; RV32ZVBC32-NEXT: csrr s3, vlenb |
| ; RV32ZVBC32-NEXT: slli s3, s3, 6 |
| ; RV32ZVBC32-NEXT: mv s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 1 |
| ; RV32ZVBC32-NEXT: add s3, s3, s5 |
| ; RV32ZVBC32-NEXT: add s3, sp, s3 |
| ; RV32ZVBC32-NEXT: addi s3, s3, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v0, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr s3, vlenb |
| ; RV32ZVBC32-NEXT: slli s3, s3, 4 |
| ; RV32ZVBC32-NEXT: mv s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 1 |
| ; RV32ZVBC32-NEXT: add s3, s3, s5 |
| ; RV32ZVBC32-NEXT: add s3, sp, s3 |
| ; RV32ZVBC32-NEXT: addi s3, s3, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v0, (s3) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s3, vlenb |
| ; RV32ZVBC32-NEXT: slli s3, s3, 3 |
| ; RV32ZVBC32-NEXT: mv s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 3 |
| ; RV32ZVBC32-NEXT: add s5, s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 1 |
| ; RV32ZVBC32-NEXT: add s3, s3, s5 |
| ; RV32ZVBC32-NEXT: add s3, sp, s3 |
| ; RV32ZVBC32-NEXT: addi s3, s3, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s3, vlenb |
| ; RV32ZVBC32-NEXT: slli s3, s3, 3 |
| ; RV32ZVBC32-NEXT: mv s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 2 |
| ; RV32ZVBC32-NEXT: add s5, s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 1 |
| ; RV32ZVBC32-NEXT: add s3, s3, s5 |
| ; RV32ZVBC32-NEXT: add s3, sp, s3 |
| ; RV32ZVBC32-NEXT: addi s3, s3, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s3, vlenb |
| ; RV32ZVBC32-NEXT: slli s3, s3, 4 |
| ; RV32ZVBC32-NEXT: mv s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 2 |
| ; RV32ZVBC32-NEXT: add s5, s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 1 |
| ; RV32ZVBC32-NEXT: add s3, s3, s5 |
| ; RV32ZVBC32-NEXT: add s3, sp, s3 |
| ; RV32ZVBC32-NEXT: addi s3, s3, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s3, vlenb |
| ; RV32ZVBC32-NEXT: slli s3, s3, 5 |
| ; RV32ZVBC32-NEXT: mv s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 2 |
| ; RV32ZVBC32-NEXT: add s3, s3, s5 |
| ; RV32ZVBC32-NEXT: add s3, sp, s3 |
| ; RV32ZVBC32-NEXT: addi s3, s3, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s3, vlenb |
| ; RV32ZVBC32-NEXT: slli s3, s3, 6 |
| ; RV32ZVBC32-NEXT: mv s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 1 |
| ; RV32ZVBC32-NEXT: add s3, s3, s5 |
| ; RV32ZVBC32-NEXT: add s3, sp, s3 |
| ; RV32ZVBC32-NEXT: addi s3, s3, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s3, vlenb |
| ; RV32ZVBC32-NEXT: slli s3, s3, 4 |
| ; RV32ZVBC32-NEXT: mv s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 2 |
| ; RV32ZVBC32-NEXT: add s5, s5, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 1 |
| ; RV32ZVBC32-NEXT: add s3, s3, s5 |
| ; RV32ZVBC32-NEXT: add s3, sp, s3 |
| ; RV32ZVBC32-NEXT: addi s3, s3, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s4), zero |
| ; RV32ZVBC32-NEXT: csrr s3, vlenb |
| ; RV32ZVBC32-NEXT: slli s3, s3, 6 |
| ; RV32ZVBC32-NEXT: mv s4, s3 |
| ; RV32ZVBC32-NEXT: slli s3, s3, 1 |
| ; RV32ZVBC32-NEXT: add s3, s3, s4 |
| ; RV32ZVBC32-NEXT: add s3, sp, s3 |
| ; RV32ZVBC32-NEXT: addi s3, s3, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s2), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s1), zero |
| ; RV32ZVBC32-NEXT: csrr s1, vlenb |
| ; RV32ZVBC32-NEXT: slli s1, s1, 3 |
| ; RV32ZVBC32-NEXT: mv s2, s1 |
| ; RV32ZVBC32-NEXT: slli s1, s1, 3 |
| ; RV32ZVBC32-NEXT: add s2, s2, s1 |
| ; RV32ZVBC32-NEXT: slli s1, s1, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, s2 |
| ; RV32ZVBC32-NEXT: add s1, sp, s1 |
| ; RV32ZVBC32-NEXT: addi s1, s1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t6), zero |
| ; RV32ZVBC32-NEXT: csrr t6, vlenb |
| ; RV32ZVBC32-NEXT: slli t6, t6, 3 |
| ; RV32ZVBC32-NEXT: mv s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 2 |
| ; RV32ZVBC32-NEXT: add t6, t6, s1 |
| ; RV32ZVBC32-NEXT: add t6, sp, t6 |
| ; RV32ZVBC32-NEXT: addi t6, t6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t6, vlenb |
| ; RV32ZVBC32-NEXT: slli t6, t6, 6 |
| ; RV32ZVBC32-NEXT: mv s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add t6, t6, s1 |
| ; RV32ZVBC32-NEXT: add t6, sp, t6 |
| ; RV32ZVBC32-NEXT: addi t6, t6, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t6, vlenb |
| ; RV32ZVBC32-NEXT: slli t6, t6, 3 |
| ; RV32ZVBC32-NEXT: mv s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 2 |
| ; RV32ZVBC32-NEXT: add t6, t6, s1 |
| ; RV32ZVBC32-NEXT: add t6, sp, t6 |
| ; RV32ZVBC32-NEXT: addi t6, t6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr t6, vlenb |
| ; RV32ZVBC32-NEXT: slli t6, t6, 5 |
| ; RV32ZVBC32-NEXT: mv s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add t6, t6, s1 |
| ; RV32ZVBC32-NEXT: add t6, sp, t6 |
| ; RV32ZVBC32-NEXT: addi t6, t6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t6, vlenb |
| ; RV32ZVBC32-NEXT: slli t6, t6, 3 |
| ; RV32ZVBC32-NEXT: mv s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 3 |
| ; RV32ZVBC32-NEXT: add s1, s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add t6, t6, s1 |
| ; RV32ZVBC32-NEXT: add t6, sp, t6 |
| ; RV32ZVBC32-NEXT: addi t6, t6, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t6, vlenb |
| ; RV32ZVBC32-NEXT: slli t6, t6, 3 |
| ; RV32ZVBC32-NEXT: mv s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 3 |
| ; RV32ZVBC32-NEXT: add t6, t6, s1 |
| ; RV32ZVBC32-NEXT: add t6, sp, t6 |
| ; RV32ZVBC32-NEXT: addi t6, t6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t6, vlenb |
| ; RV32ZVBC32-NEXT: slli t6, t6, 3 |
| ; RV32ZVBC32-NEXT: mv s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add s1, s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 2 |
| ; RV32ZVBC32-NEXT: add t6, t6, s1 |
| ; RV32ZVBC32-NEXT: add t6, sp, t6 |
| ; RV32ZVBC32-NEXT: addi t6, t6, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t6, vlenb |
| ; RV32ZVBC32-NEXT: slli t6, t6, 3 |
| ; RV32ZVBC32-NEXT: mv s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 3 |
| ; RV32ZVBC32-NEXT: add s1, s1, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add t6, t6, s1 |
| ; RV32ZVBC32-NEXT: add t6, sp, t6 |
| ; RV32ZVBC32-NEXT: addi t6, t6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s0), zero |
| ; RV32ZVBC32-NEXT: csrr t6, vlenb |
| ; RV32ZVBC32-NEXT: slli t6, t6, 3 |
| ; RV32ZVBC32-NEXT: mv s0, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add s0, s0, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 1 |
| ; RV32ZVBC32-NEXT: add s0, s0, t6 |
| ; RV32ZVBC32-NEXT: slli t6, t6, 2 |
| ; RV32ZVBC32-NEXT: add t6, t6, s0 |
| ; RV32ZVBC32-NEXT: add t6, sp, t6 |
| ; RV32ZVBC32-NEXT: addi t6, t6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (t5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t4), zero |
| ; RV32ZVBC32-NEXT: csrr t4, vlenb |
| ; RV32ZVBC32-NEXT: slli t4, t4, 6 |
| ; RV32ZVBC32-NEXT: mv t5, t4 |
| ; RV32ZVBC32-NEXT: slli t4, t4, 1 |
| ; RV32ZVBC32-NEXT: add t4, t4, t5 |
| ; RV32ZVBC32-NEXT: add t4, sp, t4 |
| ; RV32ZVBC32-NEXT: addi t4, t4, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t4) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t2), zero |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 4 |
| ; RV32ZVBC32-NEXT: mv t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t4, t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 2 |
| ; RV32ZVBC32-NEXT: add t2, t2, t4 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: mv t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t4, t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t4, t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 2 |
| ; RV32ZVBC32-NEXT: add t2, t2, t4 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 5 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: mv t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t4, t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 2 |
| ; RV32ZVBC32-NEXT: add t2, t2, t4 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 6 |
| ; RV32ZVBC32-NEXT: mv t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, t4 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 4 |
| ; RV32ZVBC32-NEXT: mv t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 3 |
| ; RV32ZVBC32-NEXT: add t2, t2, t4 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 4 |
| ; RV32ZVBC32-NEXT: mv t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t4, t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 2 |
| ; RV32ZVBC32-NEXT: add t2, t2, t4 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 6 |
| ; RV32ZVBC32-NEXT: mv t4, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, t4 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t3), zero |
| ; RV32ZVBC32-NEXT: csrr t2, vlenb |
| ; RV32ZVBC32-NEXT: slli t2, t2, 4 |
| ; RV32ZVBC32-NEXT: mv t3, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 1 |
| ; RV32ZVBC32-NEXT: add t3, t3, t2 |
| ; RV32ZVBC32-NEXT: slli t2, t2, 2 |
| ; RV32ZVBC32-NEXT: add t2, t2, t3 |
| ; RV32ZVBC32-NEXT: add t2, sp, t2 |
| ; RV32ZVBC32-NEXT: addi t2, t2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (t1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t0), zero |
| ; RV32ZVBC32-NEXT: csrr t0, vlenb |
| ; RV32ZVBC32-NEXT: slli t0, t0, 3 |
| ; RV32ZVBC32-NEXT: mv t1, t0 |
| ; RV32ZVBC32-NEXT: slli t0, t0, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, t0 |
| ; RV32ZVBC32-NEXT: slli t0, t0, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, t0 |
| ; RV32ZVBC32-NEXT: slli t0, t0, 2 |
| ; RV32ZVBC32-NEXT: add t0, t0, t1 |
| ; RV32ZVBC32-NEXT: add t0, sp, t0 |
| ; RV32ZVBC32-NEXT: addi t0, t0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a7), zero |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 7 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 4 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 3 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 4 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 3 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 3 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 4 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 7 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 3 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a6), zero |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 7 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (a5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a4), zero |
| ; RV32ZVBC32-NEXT: csrr a4, vlenb |
| ; RV32ZVBC32-NEXT: slli a4, a4, 4 |
| ; RV32ZVBC32-NEXT: mv a5, a4 |
| ; RV32ZVBC32-NEXT: slli a4, a4, 1 |
| ; RV32ZVBC32-NEXT: add a5, a5, a4 |
| ; RV32ZVBC32-NEXT: slli a4, a4, 2 |
| ; RV32ZVBC32-NEXT: add a4, a4, a5 |
| ; RV32ZVBC32-NEXT: add a4, sp, a4 |
| ; RV32ZVBC32-NEXT: addi a4, a4, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a4) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a1), zero |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: mv a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a4, a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a4, a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a4 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 7 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 4 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: mv a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: add a1, a1, a4 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 4 |
| ; RV32ZVBC32-NEXT: mv a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a4, a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a4 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 7 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: mv a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a4, a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a4, a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a4 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 4 |
| ; RV32ZVBC32-NEXT: mv a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a4, a4, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a4 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s10), zero |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 6 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (a3), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a2), zero |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: mv a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s7), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 6 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 6 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: mv a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 3 |
| ; RV32ZVBC32-NEXT: mv a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a1 |
| ; RV32ZVBC32-NEXT: slli a1, a1, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a1, sp, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v0, v16, v24 |
| ; RV32ZVBC32-NEXT: vand.vx v16, v16, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v24, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 8 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 6 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 6 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: vmul.vv v8, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v8, v16, v8 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: addi sp, sp, 352 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv8i64_vv: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e64, m8, ta, ma |
| ; RV64ZVBC32-NEXT: vand.vi v24, v16, 2 |
| ; RV64ZVBC32-NEXT: vand.vi v0, v16, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v0, v24 |
| ; RV64ZVBC32-NEXT: vand.vi v0, v16, 4 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vi v0, v16, 8 |
| ; RV64ZVBC32-NEXT: li a0, 16 |
| ; RV64ZVBC32-NEXT: li a1, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC32-NEXT: li a0, 64 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: li a1, 128 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC32-NEXT: li a0, 256 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: li a1, 512 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC32-NEXT: li a2, 1024 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: li a0, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a2 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 11 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 2 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 4 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 8 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 16 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 64 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 128 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 256 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 512 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 1024 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 2048 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 4096 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 8192 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 16384 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 32768 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 65536 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 131072 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 262144 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 31 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 33 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 34 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 35 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 36 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 37 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 38 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 39 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 40 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 41 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 42 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 43 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 44 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 45 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 46 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 47 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 48 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 49 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 50 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 51 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 52 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 53 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 54 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 55 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 56 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 57 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 58 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 59 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 60 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 61 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a1 |
| ; RV64ZVBC32-NEXT: li a1, -1 |
| ; RV64ZVBC32-NEXT: slli a0, a0, 62 |
| ; RV64ZVBC32-NEXT: slli a1, a1, 63 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vand.vx v0, v16, a0 |
| ; RV64ZVBC32-NEXT: vand.vx v16, v16, a1 |
| ; RV64ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV64ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV64ZVBC32-NEXT: vmul.vv v8, v8, v16 |
| ; RV64ZVBC32-NEXT: vxor.vv v8, v24, v8 |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 8 x i64> @llvm.clmul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb) |
| ret <vscale x 8 x i64> %v |
| } |
| |
| define <vscale x 8 x i64> @clmul_nxv8i64_vx(<vscale x 8 x i64> %va, i64 %b) nounwind { |
| ; RV32V-LABEL: clmul_nxv8i64_vx: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -352 |
| ; RV32V-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 4 |
| ; RV32V-NEXT: mv a3, a2 |
| ; RV32V-NEXT: slli a2, a2, 4 |
| ; RV32V-NEXT: add a2, a2, a3 |
| ; RV32V-NEXT: sub sp, sp, a2 |
| ; RV32V-NEXT: sw a0, 8(sp) |
| ; RV32V-NEXT: sw a1, 12(sp) |
| ; RV32V-NEXT: addi a0, sp, 8 |
| ; RV32V-NEXT: lui a1, 524288 |
| ; RV32V-NEXT: li ra, 1 |
| ; RV32V-NEXT: li a6, 2 |
| ; RV32V-NEXT: li s8, 4 |
| ; RV32V-NEXT: li s11, 8 |
| ; RV32V-NEXT: li s10, 128 |
| ; RV32V-NEXT: li s9, 256 |
| ; RV32V-NEXT: li s7, 512 |
| ; RV32V-NEXT: li s6, 1024 |
| ; RV32V-NEXT: lui s5, 1 |
| ; RV32V-NEXT: lui s4, 2 |
| ; RV32V-NEXT: lui s3, 4 |
| ; RV32V-NEXT: lui s2, 8 |
| ; RV32V-NEXT: lui s1, 16 |
| ; RV32V-NEXT: lui s0, 32 |
| ; RV32V-NEXT: lui t6, 64 |
| ; RV32V-NEXT: lui t5, 128 |
| ; RV32V-NEXT: lui t4, 256 |
| ; RV32V-NEXT: lui t2, 512 |
| ; RV32V-NEXT: lui t1, 1024 |
| ; RV32V-NEXT: lui t0, 2048 |
| ; RV32V-NEXT: lui a7, 4096 |
| ; RV32V-NEXT: lui a5, 8192 |
| ; RV32V-NEXT: lui a4, 16384 |
| ; RV32V-NEXT: vsetvli a3, zero, e64, m8, ta, ma |
| ; RV32V-NEXT: vlse64.v v16, (a0), zero |
| ; RV32V-NEXT: lui a2, 32768 |
| ; RV32V-NEXT: sw a1, 16(sp) |
| ; RV32V-NEXT: lui t3, 524288 |
| ; RV32V-NEXT: sw zero, 20(sp) |
| ; RV32V-NEXT: sw zero, 272(sp) |
| ; RV32V-NEXT: sw ra, 276(sp) |
| ; RV32V-NEXT: sw zero, 264(sp) |
| ; RV32V-NEXT: sw a6, 268(sp) |
| ; RV32V-NEXT: lui a6, 65536 |
| ; RV32V-NEXT: sw zero, 256(sp) |
| ; RV32V-NEXT: sw s8, 260(sp) |
| ; RV32V-NEXT: lui a1, 131072 |
| ; RV32V-NEXT: sw zero, 248(sp) |
| ; RV32V-NEXT: sw s11, 252(sp) |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: sw zero, 240(sp) |
| ; RV32V-NEXT: li a3, 16 |
| ; RV32V-NEXT: sw a3, 244(sp) |
| ; RV32V-NEXT: li s8, 16 |
| ; RV32V-NEXT: sw zero, 232(sp) |
| ; RV32V-NEXT: li a3, 32 |
| ; RV32V-NEXT: sw a3, 236(sp) |
| ; RV32V-NEXT: li a3, 32 |
| ; RV32V-NEXT: sw zero, 224(sp) |
| ; RV32V-NEXT: li s11, 64 |
| ; RV32V-NEXT: sw s11, 228(sp) |
| ; RV32V-NEXT: sw zero, 216(sp) |
| ; RV32V-NEXT: sw s10, 220(sp) |
| ; RV32V-NEXT: sw zero, 208(sp) |
| ; RV32V-NEXT: sw s9, 212(sp) |
| ; RV32V-NEXT: li s9, 256 |
| ; RV32V-NEXT: sw zero, 200(sp) |
| ; RV32V-NEXT: sw s7, 204(sp) |
| ; RV32V-NEXT: li s7, 512 |
| ; RV32V-NEXT: sw zero, 192(sp) |
| ; RV32V-NEXT: sw s6, 196(sp) |
| ; RV32V-NEXT: li s6, 1024 |
| ; RV32V-NEXT: slli ra, ra, 11 |
| ; RV32V-NEXT: sw zero, 184(sp) |
| ; RV32V-NEXT: sw ra, 188(sp) |
| ; RV32V-NEXT: sw zero, 176(sp) |
| ; RV32V-NEXT: sw s5, 180(sp) |
| ; RV32V-NEXT: lui s11, 1 |
| ; RV32V-NEXT: sw zero, 168(sp) |
| ; RV32V-NEXT: sw s4, 172(sp) |
| ; RV32V-NEXT: sw zero, 160(sp) |
| ; RV32V-NEXT: sw s3, 164(sp) |
| ; RV32V-NEXT: sw zero, 152(sp) |
| ; RV32V-NEXT: sw s2, 156(sp) |
| ; RV32V-NEXT: sw zero, 144(sp) |
| ; RV32V-NEXT: sw s1, 148(sp) |
| ; RV32V-NEXT: sw zero, 136(sp) |
| ; RV32V-NEXT: sw s0, 140(sp) |
| ; RV32V-NEXT: sw zero, 128(sp) |
| ; RV32V-NEXT: sw t6, 132(sp) |
| ; RV32V-NEXT: sw zero, 120(sp) |
| ; RV32V-NEXT: sw t5, 124(sp) |
| ; RV32V-NEXT: sw zero, 112(sp) |
| ; RV32V-NEXT: sw t4, 116(sp) |
| ; RV32V-NEXT: sw zero, 104(sp) |
| ; RV32V-NEXT: sw t2, 108(sp) |
| ; RV32V-NEXT: sw zero, 96(sp) |
| ; RV32V-NEXT: sw t1, 100(sp) |
| ; RV32V-NEXT: sw zero, 88(sp) |
| ; RV32V-NEXT: sw t0, 92(sp) |
| ; RV32V-NEXT: sw zero, 80(sp) |
| ; RV32V-NEXT: sw a7, 84(sp) |
| ; RV32V-NEXT: sw zero, 72(sp) |
| ; RV32V-NEXT: sw a5, 76(sp) |
| ; RV32V-NEXT: sw zero, 64(sp) |
| ; RV32V-NEXT: sw a4, 68(sp) |
| ; RV32V-NEXT: sw zero, 56(sp) |
| ; RV32V-NEXT: sw a2, 60(sp) |
| ; RV32V-NEXT: sw zero, 48(sp) |
| ; RV32V-NEXT: sw a6, 52(sp) |
| ; RV32V-NEXT: sw zero, 40(sp) |
| ; RV32V-NEXT: sw a1, 44(sp) |
| ; RV32V-NEXT: lui s10, 131072 |
| ; RV32V-NEXT: sw zero, 32(sp) |
| ; RV32V-NEXT: sw a0, 36(sp) |
| ; RV32V-NEXT: sw zero, 24(sp) |
| ; RV32V-NEXT: sw t3, 28(sp) |
| ; RV32V-NEXT: addi a1, sp, 16 |
| ; RV32V-NEXT: vand.vi v24, v16, 2 |
| ; RV32V-NEXT: vand.vi v0, v16, 1 |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v0, v24 |
| ; RV32V-NEXT: vand.vi v0, v16, 4 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vi v0, v16, 8 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s8 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a3 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: li a0, 64 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: li a0, 128 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: addi s8, sp, 248 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s9 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s7 |
| ; RV32V-NEXT: addi s9, sp, 232 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s6 |
| ; RV32V-NEXT: addi s7, sp, 224 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, ra |
| ; RV32V-NEXT: addi ra, sp, 216 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s11 |
| ; RV32V-NEXT: addi s6, sp, 208 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s4 |
| ; RV32V-NEXT: addi s5, sp, 200 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s3 |
| ; RV32V-NEXT: addi s4, sp, 192 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s2 |
| ; RV32V-NEXT: addi s2, sp, 184 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s1 |
| ; RV32V-NEXT: addi s3, sp, 176 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s0 |
| ; RV32V-NEXT: addi s1, sp, 168 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t6 |
| ; RV32V-NEXT: addi s0, sp, 160 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t5 |
| ; RV32V-NEXT: addi t5, sp, 152 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t4 |
| ; RV32V-NEXT: addi t6, sp, 144 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t2 |
| ; RV32V-NEXT: addi t4, sp, 136 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t1 |
| ; RV32V-NEXT: addi t3, sp, 128 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, t0 |
| ; RV32V-NEXT: addi t1, sp, 120 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a7 |
| ; RV32V-NEXT: addi t2, sp, 112 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a5 |
| ; RV32V-NEXT: addi t0, sp, 104 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a4 |
| ; RV32V-NEXT: addi a7, sp, 96 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, a2 |
| ; RV32V-NEXT: addi a6, sp, 88 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: lui a0, 65536 |
| ; RV32V-NEXT: vand.vx v0, v16, a0 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: vand.vx v0, v16, s10 |
| ; RV32V-NEXT: vmul.vv v0, v8, v0 |
| ; RV32V-NEXT: vxor.vv v24, v24, v0 |
| ; RV32V-NEXT: sw t1, 4(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: add a0, a0, a2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (a1), zero |
| ; RV32V-NEXT: addi a5, sp, 80 |
| ; RV32V-NEXT: addi a4, sp, 72 |
| ; RV32V-NEXT: addi a2, sp, 64 |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 8 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: addi a0, sp, 56 |
| ; RV32V-NEXT: addi s11, sp, 48 |
| ; RV32V-NEXT: addi a3, sp, 40 |
| ; RV32V-NEXT: addi a1, sp, 32 |
| ; RV32V-NEXT: addi s10, sp, 272 |
| ; RV32V-NEXT: vlse64.v v24, (s10), zero |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 4 |
| ; RV32V-NEXT: mv s10, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s10, s10, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s10, s10, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, s10 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: addi s10, sp, 264 |
| ; RV32V-NEXT: vlse64.v v0, (s10), zero |
| ; RV32V-NEXT: addi s10, sp, 256 |
| ; RV32V-NEXT: vlse64.v v24, (s10), zero |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 3 |
| ; RV32V-NEXT: mv s10, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s10, s10, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s10, s10, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s10, s10, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, s10 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s8), zero |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 3 |
| ; RV32V-NEXT: mv s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 2 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, s8 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 4 |
| ; RV32V-NEXT: mv s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, s8 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 5 |
| ; RV32V-NEXT: mv s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, s8 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 3 |
| ; RV32V-NEXT: mv s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 2 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, s8 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 3 |
| ; RV32V-NEXT: mv s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, s8 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 4 |
| ; RV32V-NEXT: mv s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, s8 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 3 |
| ; RV32V-NEXT: mv s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 2 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, s8 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 3 |
| ; RV32V-NEXT: mv s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add s8, s8, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, s8 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: addi s8, sp, 24 |
| ; RV32V-NEXT: addi s10, sp, 240 |
| ; RV32V-NEXT: vlse64.v v24, (s10), zero |
| ; RV32V-NEXT: csrr s10, vlenb |
| ; RV32V-NEXT: slli s10, s10, 4 |
| ; RV32V-NEXT: mv t1, s10 |
| ; RV32V-NEXT: slli s10, s10, 2 |
| ; RV32V-NEXT: add t1, t1, s10 |
| ; RV32V-NEXT: slli s10, s10, 1 |
| ; RV32V-NEXT: add s10, s10, t1 |
| ; RV32V-NEXT: lw t1, 4(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: add s10, sp, s10 |
| ; RV32V-NEXT: addi s10, s10, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s10) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (s9), zero |
| ; RV32V-NEXT: vlse64.v v24, (s7), zero |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 3 |
| ; RV32V-NEXT: mv s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s7, s7, s9 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (ra), zero |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 3 |
| ; RV32V-NEXT: mv s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 3 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s7, s7, s9 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 4 |
| ; RV32V-NEXT: mv s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s7, s7, s9 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s7) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 3 |
| ; RV32V-NEXT: mv s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s7, s7, s9 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 4 |
| ; RV32V-NEXT: mv s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s7, s7, s9 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 3 |
| ; RV32V-NEXT: mv s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s7, s7, s9 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s7) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 3 |
| ; RV32V-NEXT: mv s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: add s7, s7, s9 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 3 |
| ; RV32V-NEXT: mv s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 3 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s7, s7, s9 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s7) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s7, vlenb |
| ; RV32V-NEXT: slli s7, s7, 3 |
| ; RV32V-NEXT: mv s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 2 |
| ; RV32V-NEXT: add s9, s9, s7 |
| ; RV32V-NEXT: slli s7, s7, 1 |
| ; RV32V-NEXT: add s7, s7, s9 |
| ; RV32V-NEXT: add s7, sp, s7 |
| ; RV32V-NEXT: addi s7, s7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (s6), zero |
| ; RV32V-NEXT: vlse64.v v24, (s5), zero |
| ; RV32V-NEXT: csrr s5, vlenb |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: mv s6, s5 |
| ; RV32V-NEXT: slli s5, s5, 3 |
| ; RV32V-NEXT: add s6, s6, s5 |
| ; RV32V-NEXT: slli s5, s5, 1 |
| ; RV32V-NEXT: add s5, s5, s6 |
| ; RV32V-NEXT: add s5, sp, s5 |
| ; RV32V-NEXT: addi s5, s5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s4), zero |
| ; RV32V-NEXT: csrr s4, vlenb |
| ; RV32V-NEXT: slli s4, s4, 4 |
| ; RV32V-NEXT: mv s5, s4 |
| ; RV32V-NEXT: slli s4, s4, 2 |
| ; RV32V-NEXT: add s5, s5, s4 |
| ; RV32V-NEXT: slli s4, s4, 1 |
| ; RV32V-NEXT: add s4, s4, s5 |
| ; RV32V-NEXT: add s4, sp, s4 |
| ; RV32V-NEXT: addi s4, s4, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s4) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s2), zero |
| ; RV32V-NEXT: csrr s2, vlenb |
| ; RV32V-NEXT: slli s2, s2, 6 |
| ; RV32V-NEXT: mv s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 1 |
| ; RV32V-NEXT: add s2, s2, s4 |
| ; RV32V-NEXT: add s2, sp, s2 |
| ; RV32V-NEXT: addi s2, s2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v0, v16, v0 |
| ; RV32V-NEXT: csrr s2, vlenb |
| ; RV32V-NEXT: slli s2, s2, 4 |
| ; RV32V-NEXT: mv s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 1 |
| ; RV32V-NEXT: add s2, s2, s4 |
| ; RV32V-NEXT: add s2, sp, s2 |
| ; RV32V-NEXT: addi s2, s2, 288 |
| ; RV32V-NEXT: vs8r.v v0, (s2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s2, vlenb |
| ; RV32V-NEXT: slli s2, s2, 3 |
| ; RV32V-NEXT: mv s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 3 |
| ; RV32V-NEXT: add s4, s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 1 |
| ; RV32V-NEXT: add s2, s2, s4 |
| ; RV32V-NEXT: add s2, sp, s2 |
| ; RV32V-NEXT: addi s2, s2, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s2) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s2, vlenb |
| ; RV32V-NEXT: slli s2, s2, 3 |
| ; RV32V-NEXT: mv s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 2 |
| ; RV32V-NEXT: add s4, s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 1 |
| ; RV32V-NEXT: add s2, s2, s4 |
| ; RV32V-NEXT: add s2, sp, s2 |
| ; RV32V-NEXT: addi s2, s2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s2, vlenb |
| ; RV32V-NEXT: slli s2, s2, 4 |
| ; RV32V-NEXT: mv s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 2 |
| ; RV32V-NEXT: add s4, s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 1 |
| ; RV32V-NEXT: add s2, s2, s4 |
| ; RV32V-NEXT: add s2, sp, s2 |
| ; RV32V-NEXT: addi s2, s2, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s2) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s2, vlenb |
| ; RV32V-NEXT: slli s2, s2, 5 |
| ; RV32V-NEXT: mv s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 2 |
| ; RV32V-NEXT: add s2, s2, s4 |
| ; RV32V-NEXT: add s2, sp, s2 |
| ; RV32V-NEXT: addi s2, s2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr s2, vlenb |
| ; RV32V-NEXT: slli s2, s2, 6 |
| ; RV32V-NEXT: mv s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 1 |
| ; RV32V-NEXT: add s2, s2, s4 |
| ; RV32V-NEXT: add s2, sp, s2 |
| ; RV32V-NEXT: addi s2, s2, 288 |
| ; RV32V-NEXT: vl8r.v v24, (s2) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr s2, vlenb |
| ; RV32V-NEXT: slli s2, s2, 4 |
| ; RV32V-NEXT: mv s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 2 |
| ; RV32V-NEXT: add s4, s4, s2 |
| ; RV32V-NEXT: slli s2, s2, 1 |
| ; RV32V-NEXT: add s2, s2, s4 |
| ; RV32V-NEXT: add s2, sp, s2 |
| ; RV32V-NEXT: addi s2, s2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s3), zero |
| ; RV32V-NEXT: csrr s2, vlenb |
| ; RV32V-NEXT: slli s2, s2, 6 |
| ; RV32V-NEXT: mv s3, s2 |
| ; RV32V-NEXT: slli s2, s2, 1 |
| ; RV32V-NEXT: add s2, s2, s3 |
| ; RV32V-NEXT: add s2, sp, s2 |
| ; RV32V-NEXT: addi s2, s2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (s1), zero |
| ; RV32V-NEXT: vlse64.v v24, (s0), zero |
| ; RV32V-NEXT: csrr s0, vlenb |
| ; RV32V-NEXT: slli s0, s0, 3 |
| ; RV32V-NEXT: mv s1, s0 |
| ; RV32V-NEXT: slli s0, s0, 3 |
| ; RV32V-NEXT: add s1, s1, s0 |
| ; RV32V-NEXT: slli s0, s0, 1 |
| ; RV32V-NEXT: add s0, s0, s1 |
| ; RV32V-NEXT: add s0, sp, s0 |
| ; RV32V-NEXT: addi s0, s0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (s0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (t5), zero |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: mv s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add s0, s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add s0, s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: add t5, t5, s0 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 6 |
| ; RV32V-NEXT: mv s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add t5, t5, s0 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t5) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: mv s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: add t5, t5, s0 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 5 |
| ; RV32V-NEXT: mv s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add t5, t5, s0 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: mv s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: add s0, s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add t5, t5, s0 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t5) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: mv s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add s0, s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: add t5, t5, s0 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: mv s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add s0, s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add s0, s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: add t5, t5, s0 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t5) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: mv s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: add s0, s0, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add t5, t5, s0 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (t6), zero |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 3 |
| ; RV32V-NEXT: mv t6, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add t6, t6, t5 |
| ; RV32V-NEXT: slli t5, t5, 1 |
| ; RV32V-NEXT: add t6, t6, t5 |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: add t5, t5, t6 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (t4), zero |
| ; RV32V-NEXT: vlse64.v v24, (t3), zero |
| ; RV32V-NEXT: csrr t3, vlenb |
| ; RV32V-NEXT: slli t3, t3, 6 |
| ; RV32V-NEXT: mv t4, t3 |
| ; RV32V-NEXT: slli t3, t3, 1 |
| ; RV32V-NEXT: add t3, t3, t4 |
| ; RV32V-NEXT: add t3, sp, t3 |
| ; RV32V-NEXT: addi t3, t3, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t3) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (t1), zero |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 4 |
| ; RV32V-NEXT: mv t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t3, t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 2 |
| ; RV32V-NEXT: add t1, t1, t3 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 3 |
| ; RV32V-NEXT: mv t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t3, t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t3, t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 2 |
| ; RV32V-NEXT: add t1, t1, t3 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 5 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 3 |
| ; RV32V-NEXT: mv t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t3, t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 2 |
| ; RV32V-NEXT: add t1, t1, t3 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 6 |
| ; RV32V-NEXT: mv t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, t3 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 4 |
| ; RV32V-NEXT: mv t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 3 |
| ; RV32V-NEXT: add t1, t1, t3 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 4 |
| ; RV32V-NEXT: mv t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t3, t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 2 |
| ; RV32V-NEXT: add t1, t1, t3 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 6 |
| ; RV32V-NEXT: mv t3, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t1, t1, t3 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (t2), zero |
| ; RV32V-NEXT: csrr t1, vlenb |
| ; RV32V-NEXT: slli t1, t1, 4 |
| ; RV32V-NEXT: mv t2, t1 |
| ; RV32V-NEXT: slli t1, t1, 1 |
| ; RV32V-NEXT: add t2, t2, t1 |
| ; RV32V-NEXT: slli t1, t1, 2 |
| ; RV32V-NEXT: add t1, t1, t2 |
| ; RV32V-NEXT: add t1, sp, t1 |
| ; RV32V-NEXT: addi t1, t1, 288 |
| ; RV32V-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (t0), zero |
| ; RV32V-NEXT: vlse64.v v24, (a7), zero |
| ; RV32V-NEXT: csrr a7, vlenb |
| ; RV32V-NEXT: slli a7, a7, 3 |
| ; RV32V-NEXT: mv t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 1 |
| ; RV32V-NEXT: add t0, t0, a7 |
| ; RV32V-NEXT: slli a7, a7, 2 |
| ; RV32V-NEXT: add a7, a7, t0 |
| ; RV32V-NEXT: add a7, sp, a7 |
| ; RV32V-NEXT: addi a7, a7, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (a6), zero |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 7 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 4 |
| ; RV32V-NEXT: mv a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add a7, a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 2 |
| ; RV32V-NEXT: add a6, a6, a7 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 3 |
| ; RV32V-NEXT: mv a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add a6, a6, a7 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 4 |
| ; RV32V-NEXT: mv a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 2 |
| ; RV32V-NEXT: add a6, a6, a7 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 3 |
| ; RV32V-NEXT: mv a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add a7, a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add a7, a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 2 |
| ; RV32V-NEXT: add a6, a6, a7 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 3 |
| ; RV32V-NEXT: mv a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 4 |
| ; RV32V-NEXT: add a6, a6, a7 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 7 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a6, vlenb |
| ; RV32V-NEXT: slli a6, a6, 3 |
| ; RV32V-NEXT: mv a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add a7, a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 1 |
| ; RV32V-NEXT: add a7, a7, a6 |
| ; RV32V-NEXT: slli a6, a6, 2 |
| ; RV32V-NEXT: add a6, a6, a7 |
| ; RV32V-NEXT: add a6, sp, a6 |
| ; RV32V-NEXT: addi a6, a6, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (a5), zero |
| ; RV32V-NEXT: csrr a5, vlenb |
| ; RV32V-NEXT: slli a5, a5, 7 |
| ; RV32V-NEXT: add a5, sp, a5 |
| ; RV32V-NEXT: addi a5, a5, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (a4), zero |
| ; RV32V-NEXT: vlse64.v v24, (a2), zero |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 4 |
| ; RV32V-NEXT: mv a4, a2 |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: add a4, a4, a2 |
| ; RV32V-NEXT: slli a2, a2, 2 |
| ; RV32V-NEXT: add a2, a2, a4 |
| ; RV32V-NEXT: add a2, sp, a2 |
| ; RV32V-NEXT: addi a2, a2, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (a0), zero |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a2, a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a2, a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a2, a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a2, a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a2, a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a2, a2, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s11), zero |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 6 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v0, (a3), zero |
| ; RV32V-NEXT: vlse64.v v24, (a1), zero |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v24, (s8), zero |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 6 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v24, v16, v0 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 6 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v24, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v0, v16, v24 |
| ; RV32V-NEXT: lui a0, 262144 |
| ; RV32V-NEXT: vand.vx v16, v16, a0 |
| ; RV32V-NEXT: vmul.vv v16, v8, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vxor.vv v16, v24, v16 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 8 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 5 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 6 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 7 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 6 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 3 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a1, a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v24, v8, v24 |
| ; RV32V-NEXT: vxor.vv v16, v16, v24 |
| ; RV32V-NEXT: vmul.vv v8, v8, v0 |
| ; RV32V-NEXT: vxor.vv v8, v16, v8 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 4 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add sp, sp, a0 |
| ; RV32V-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: addi sp, sp, 352 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv8i64_vx: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: andi a1, a0, 2 |
| ; RV64V-NEXT: andi a2, a0, 1 |
| ; RV64V-NEXT: vsetvli a3, zero, e64, m8, ta, ma |
| ; RV64V-NEXT: vmul.vx v16, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 4 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 8 |
| ; RV64V-NEXT: vxor.vv v16, v24, v16 |
| ; RV64V-NEXT: vmul.vx v24, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 16 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 32 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 64 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 128 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 256 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 512 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a1 |
| ; RV64V-NEXT: andi a3, a0, 1024 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: li a1, 1 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a3 |
| ; RV64V-NEXT: slli a2, a1, 11 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 1 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 2 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 4 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 8 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 16 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 32 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 64 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 128 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 256 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 512 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 1024 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 2048 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 4096 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 8192 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 16384 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 32768 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 65536 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 131072 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: lui a2, 262144 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: srliw a2, a0, 31 |
| ; RV64V-NEXT: slli a2, a2, 31 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 32 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 33 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 34 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 35 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 36 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 37 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 38 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 39 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 40 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 41 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 42 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 43 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 44 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 45 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 46 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 47 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 48 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 49 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 50 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 51 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 52 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 53 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 54 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 55 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 56 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 57 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 58 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 59 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 60 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 61 |
| ; RV64V-NEXT: slli a1, a1, 62 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: and a1, a0, a1 |
| ; RV64V-NEXT: srli a0, a0, 63 |
| ; RV64V-NEXT: slli a0, a0, 63 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a2 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v24, v8, a1 |
| ; RV64V-NEXT: vxor.vv v16, v16, v24 |
| ; RV64V-NEXT: vmul.vx v8, v8, a0 |
| ; RV64V-NEXT: vxor.vv v8, v16, v8 |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv8i64_vx: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: addi sp, sp, -16 |
| ; RV32ZVBC64-NEXT: sw a0, 8(sp) |
| ; RV32ZVBC64-NEXT: sw a1, 12(sp) |
| ; RV32ZVBC64-NEXT: addi a0, sp, 8 |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e64, m8, ta, ma |
| ; RV32ZVBC64-NEXT: vlse64.v v16, (a0), zero |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v8, v16 |
| ; RV32ZVBC64-NEXT: addi sp, sp, 16 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv8i64_vx: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e64, m8, ta, ma |
| ; RV64ZVBC64-NEXT: vclmul.vx v8, v8, a0 |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv8i64_vx: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: addi sp, sp, -352 |
| ; RV32ZVBC32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 4 |
| ; RV32ZVBC32-NEXT: mv a3, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 4 |
| ; RV32ZVBC32-NEXT: add a2, a2, a3 |
| ; RV32ZVBC32-NEXT: sub sp, sp, a2 |
| ; RV32ZVBC32-NEXT: sw a0, 8(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 12(sp) |
| ; RV32ZVBC32-NEXT: addi a0, sp, 8 |
| ; RV32ZVBC32-NEXT: lui a1, 524288 |
| ; RV32ZVBC32-NEXT: li ra, 1 |
| ; RV32ZVBC32-NEXT: li a6, 2 |
| ; RV32ZVBC32-NEXT: li s8, 4 |
| ; RV32ZVBC32-NEXT: li s11, 8 |
| ; RV32ZVBC32-NEXT: li s10, 128 |
| ; RV32ZVBC32-NEXT: li s9, 256 |
| ; RV32ZVBC32-NEXT: li s7, 512 |
| ; RV32ZVBC32-NEXT: li s6, 1024 |
| ; RV32ZVBC32-NEXT: lui s5, 1 |
| ; RV32ZVBC32-NEXT: lui s4, 2 |
| ; RV32ZVBC32-NEXT: lui s3, 4 |
| ; RV32ZVBC32-NEXT: lui s2, 8 |
| ; RV32ZVBC32-NEXT: lui s1, 16 |
| ; RV32ZVBC32-NEXT: lui s0, 32 |
| ; RV32ZVBC32-NEXT: lui t6, 64 |
| ; RV32ZVBC32-NEXT: lui t5, 128 |
| ; RV32ZVBC32-NEXT: lui t4, 256 |
| ; RV32ZVBC32-NEXT: lui t2, 512 |
| ; RV32ZVBC32-NEXT: lui t1, 1024 |
| ; RV32ZVBC32-NEXT: lui t0, 2048 |
| ; RV32ZVBC32-NEXT: lui a7, 4096 |
| ; RV32ZVBC32-NEXT: lui a5, 8192 |
| ; RV32ZVBC32-NEXT: lui a4, 16384 |
| ; RV32ZVBC32-NEXT: vsetvli a3, zero, e64, m8, ta, ma |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a0), zero |
| ; RV32ZVBC32-NEXT: lui a2, 32768 |
| ; RV32ZVBC32-NEXT: sw a1, 16(sp) |
| ; RV32ZVBC32-NEXT: lui t3, 524288 |
| ; RV32ZVBC32-NEXT: sw zero, 20(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 272(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 276(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 264(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 268(sp) |
| ; RV32ZVBC32-NEXT: lui a6, 65536 |
| ; RV32ZVBC32-NEXT: sw zero, 256(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 260(sp) |
| ; RV32ZVBC32-NEXT: lui a1, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 248(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 252(sp) |
| ; RV32ZVBC32-NEXT: lui a0, 262144 |
| ; RV32ZVBC32-NEXT: sw zero, 240(sp) |
| ; RV32ZVBC32-NEXT: li a3, 16 |
| ; RV32ZVBC32-NEXT: sw a3, 244(sp) |
| ; RV32ZVBC32-NEXT: li s8, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 232(sp) |
| ; RV32ZVBC32-NEXT: li a3, 32 |
| ; RV32ZVBC32-NEXT: sw a3, 236(sp) |
| ; RV32ZVBC32-NEXT: li a3, 32 |
| ; RV32ZVBC32-NEXT: sw zero, 224(sp) |
| ; RV32ZVBC32-NEXT: li s11, 64 |
| ; RV32ZVBC32-NEXT: sw s11, 228(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 216(sp) |
| ; RV32ZVBC32-NEXT: sw s10, 220(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 208(sp) |
| ; RV32ZVBC32-NEXT: sw s9, 212(sp) |
| ; RV32ZVBC32-NEXT: li s9, 256 |
| ; RV32ZVBC32-NEXT: sw zero, 200(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 204(sp) |
| ; RV32ZVBC32-NEXT: li s7, 512 |
| ; RV32ZVBC32-NEXT: sw zero, 192(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 196(sp) |
| ; RV32ZVBC32-NEXT: li s6, 1024 |
| ; RV32ZVBC32-NEXT: slli ra, ra, 11 |
| ; RV32ZVBC32-NEXT: sw zero, 184(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 188(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 176(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 180(sp) |
| ; RV32ZVBC32-NEXT: lui s11, 1 |
| ; RV32ZVBC32-NEXT: sw zero, 168(sp) |
| ; RV32ZVBC32-NEXT: sw s4, 172(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 160(sp) |
| ; RV32ZVBC32-NEXT: sw s3, 164(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 152(sp) |
| ; RV32ZVBC32-NEXT: sw s2, 156(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 144(sp) |
| ; RV32ZVBC32-NEXT: sw s1, 148(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 136(sp) |
| ; RV32ZVBC32-NEXT: sw s0, 140(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 128(sp) |
| ; RV32ZVBC32-NEXT: sw t6, 132(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 120(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 124(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 112(sp) |
| ; RV32ZVBC32-NEXT: sw t4, 116(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 104(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 108(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 96(sp) |
| ; RV32ZVBC32-NEXT: sw t1, 100(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 88(sp) |
| ; RV32ZVBC32-NEXT: sw t0, 92(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 80(sp) |
| ; RV32ZVBC32-NEXT: sw a7, 84(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 72(sp) |
| ; RV32ZVBC32-NEXT: sw a5, 76(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 64(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 68(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 56(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 60(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 48(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 52(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 40(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 44(sp) |
| ; RV32ZVBC32-NEXT: lui s10, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 32(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 36(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 24(sp) |
| ; RV32ZVBC32-NEXT: sw t3, 28(sp) |
| ; RV32ZVBC32-NEXT: addi a1, sp, 16 |
| ; RV32ZVBC32-NEXT: vand.vi v24, v16, 2 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v16, 1 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v0, v24 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v16, 4 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vi v0, v16, 8 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s8 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a3 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: li a0, 64 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: li a0, 128 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC32-NEXT: addi s8, sp, 248 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s9 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s7 |
| ; RV32ZVBC32-NEXT: addi s9, sp, 232 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s6 |
| ; RV32ZVBC32-NEXT: addi s7, sp, 224 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, ra |
| ; RV32ZVBC32-NEXT: addi ra, sp, 216 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s11 |
| ; RV32ZVBC32-NEXT: addi s6, sp, 208 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s4 |
| ; RV32ZVBC32-NEXT: addi s5, sp, 200 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s3 |
| ; RV32ZVBC32-NEXT: addi s4, sp, 192 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s2 |
| ; RV32ZVBC32-NEXT: addi s2, sp, 184 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s1 |
| ; RV32ZVBC32-NEXT: addi s3, sp, 176 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s0 |
| ; RV32ZVBC32-NEXT: addi s1, sp, 168 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t6 |
| ; RV32ZVBC32-NEXT: addi s0, sp, 160 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t5 |
| ; RV32ZVBC32-NEXT: addi t5, sp, 152 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t4 |
| ; RV32ZVBC32-NEXT: addi t6, sp, 144 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t2 |
| ; RV32ZVBC32-NEXT: addi t4, sp, 136 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t1 |
| ; RV32ZVBC32-NEXT: addi t3, sp, 128 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, t0 |
| ; RV32ZVBC32-NEXT: addi t1, sp, 120 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a7 |
| ; RV32ZVBC32-NEXT: addi t2, sp, 112 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a5 |
| ; RV32ZVBC32-NEXT: addi t0, sp, 104 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a4 |
| ; RV32ZVBC32-NEXT: addi a7, sp, 96 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a2 |
| ; RV32ZVBC32-NEXT: addi a6, sp, 88 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: lui a0, 65536 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: vand.vx v0, v16, s10 |
| ; RV32ZVBC32-NEXT: vmul.vv v0, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v24, v24, v0 |
| ; RV32ZVBC32-NEXT: sw t1, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: add a0, a0, a2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a5, sp, 80 |
| ; RV32ZVBC32-NEXT: addi a4, sp, 72 |
| ; RV32ZVBC32-NEXT: addi a2, sp, 64 |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 8 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a0, sp, 56 |
| ; RV32ZVBC32-NEXT: addi s11, sp, 48 |
| ; RV32ZVBC32-NEXT: addi a3, sp, 40 |
| ; RV32ZVBC32-NEXT: addi a1, sp, 32 |
| ; RV32ZVBC32-NEXT: addi s10, sp, 272 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s10), zero |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 4 |
| ; RV32ZVBC32-NEXT: mv s10, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s10, s10, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s10, s10, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, s10 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s10, sp, 264 |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s10), zero |
| ; RV32ZVBC32-NEXT: addi s10, sp, 256 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s10), zero |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 3 |
| ; RV32ZVBC32-NEXT: mv s10, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s10, s10, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s10, s10, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s10, s10, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, s10 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s8), zero |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 3 |
| ; RV32ZVBC32-NEXT: mv s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, s8 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 4 |
| ; RV32ZVBC32-NEXT: mv s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, s8 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 5 |
| ; RV32ZVBC32-NEXT: mv s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, s8 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 3 |
| ; RV32ZVBC32-NEXT: mv s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, s8 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 3 |
| ; RV32ZVBC32-NEXT: mv s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, s8 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 4 |
| ; RV32ZVBC32-NEXT: mv s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, s8 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 3 |
| ; RV32ZVBC32-NEXT: mv s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 2 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, s8 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 3 |
| ; RV32ZVBC32-NEXT: mv s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add s8, s8, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, s8 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s8, sp, 24 |
| ; RV32ZVBC32-NEXT: addi s10, sp, 240 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s10), zero |
| ; RV32ZVBC32-NEXT: csrr s10, vlenb |
| ; RV32ZVBC32-NEXT: slli s10, s10, 4 |
| ; RV32ZVBC32-NEXT: mv t1, s10 |
| ; RV32ZVBC32-NEXT: slli s10, s10, 2 |
| ; RV32ZVBC32-NEXT: add t1, t1, s10 |
| ; RV32ZVBC32-NEXT: slli s10, s10, 1 |
| ; RV32ZVBC32-NEXT: add s10, s10, t1 |
| ; RV32ZVBC32-NEXT: lw t1, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: add s10, sp, s10 |
| ; RV32ZVBC32-NEXT: addi s10, s10, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s10) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s9), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s7), zero |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 3 |
| ; RV32ZVBC32-NEXT: mv s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s9 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (ra), zero |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 3 |
| ; RV32ZVBC32-NEXT: mv s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 3 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s9 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 4 |
| ; RV32ZVBC32-NEXT: mv s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s9 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s7) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 3 |
| ; RV32ZVBC32-NEXT: mv s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s9 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 4 |
| ; RV32ZVBC32-NEXT: mv s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s9 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 3 |
| ; RV32ZVBC32-NEXT: mv s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s9 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s7) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 3 |
| ; RV32ZVBC32-NEXT: mv s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: add s7, s7, s9 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 3 |
| ; RV32ZVBC32-NEXT: mv s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 3 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s9 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s7) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s7, vlenb |
| ; RV32ZVBC32-NEXT: slli s7, s7, 3 |
| ; RV32ZVBC32-NEXT: mv s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 2 |
| ; RV32ZVBC32-NEXT: add s9, s9, s7 |
| ; RV32ZVBC32-NEXT: slli s7, s7, 1 |
| ; RV32ZVBC32-NEXT: add s7, s7, s9 |
| ; RV32ZVBC32-NEXT: add s7, sp, s7 |
| ; RV32ZVBC32-NEXT: addi s7, s7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s5), zero |
| ; RV32ZVBC32-NEXT: csrr s5, vlenb |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: mv s6, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 3 |
| ; RV32ZVBC32-NEXT: add s6, s6, s5 |
| ; RV32ZVBC32-NEXT: slli s5, s5, 1 |
| ; RV32ZVBC32-NEXT: add s5, s5, s6 |
| ; RV32ZVBC32-NEXT: add s5, sp, s5 |
| ; RV32ZVBC32-NEXT: addi s5, s5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s4), zero |
| ; RV32ZVBC32-NEXT: csrr s4, vlenb |
| ; RV32ZVBC32-NEXT: slli s4, s4, 4 |
| ; RV32ZVBC32-NEXT: mv s5, s4 |
| ; RV32ZVBC32-NEXT: slli s4, s4, 2 |
| ; RV32ZVBC32-NEXT: add s5, s5, s4 |
| ; RV32ZVBC32-NEXT: slli s4, s4, 1 |
| ; RV32ZVBC32-NEXT: add s4, s4, s5 |
| ; RV32ZVBC32-NEXT: add s4, sp, s4 |
| ; RV32ZVBC32-NEXT: addi s4, s4, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s4) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s2), zero |
| ; RV32ZVBC32-NEXT: csrr s2, vlenb |
| ; RV32ZVBC32-NEXT: slli s2, s2, 6 |
| ; RV32ZVBC32-NEXT: mv s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 1 |
| ; RV32ZVBC32-NEXT: add s2, s2, s4 |
| ; RV32ZVBC32-NEXT: add s2, sp, s2 |
| ; RV32ZVBC32-NEXT: addi s2, s2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v0, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr s2, vlenb |
| ; RV32ZVBC32-NEXT: slli s2, s2, 4 |
| ; RV32ZVBC32-NEXT: mv s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 1 |
| ; RV32ZVBC32-NEXT: add s2, s2, s4 |
| ; RV32ZVBC32-NEXT: add s2, sp, s2 |
| ; RV32ZVBC32-NEXT: addi s2, s2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v0, (s2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s2, vlenb |
| ; RV32ZVBC32-NEXT: slli s2, s2, 3 |
| ; RV32ZVBC32-NEXT: mv s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 3 |
| ; RV32ZVBC32-NEXT: add s4, s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 1 |
| ; RV32ZVBC32-NEXT: add s2, s2, s4 |
| ; RV32ZVBC32-NEXT: add s2, sp, s2 |
| ; RV32ZVBC32-NEXT: addi s2, s2, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s2) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s2, vlenb |
| ; RV32ZVBC32-NEXT: slli s2, s2, 3 |
| ; RV32ZVBC32-NEXT: mv s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 2 |
| ; RV32ZVBC32-NEXT: add s4, s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 1 |
| ; RV32ZVBC32-NEXT: add s2, s2, s4 |
| ; RV32ZVBC32-NEXT: add s2, sp, s2 |
| ; RV32ZVBC32-NEXT: addi s2, s2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s2, vlenb |
| ; RV32ZVBC32-NEXT: slli s2, s2, 4 |
| ; RV32ZVBC32-NEXT: mv s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 2 |
| ; RV32ZVBC32-NEXT: add s4, s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 1 |
| ; RV32ZVBC32-NEXT: add s2, s2, s4 |
| ; RV32ZVBC32-NEXT: add s2, sp, s2 |
| ; RV32ZVBC32-NEXT: addi s2, s2, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s2) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s2, vlenb |
| ; RV32ZVBC32-NEXT: slli s2, s2, 5 |
| ; RV32ZVBC32-NEXT: mv s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 2 |
| ; RV32ZVBC32-NEXT: add s2, s2, s4 |
| ; RV32ZVBC32-NEXT: add s2, sp, s2 |
| ; RV32ZVBC32-NEXT: addi s2, s2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr s2, vlenb |
| ; RV32ZVBC32-NEXT: slli s2, s2, 6 |
| ; RV32ZVBC32-NEXT: mv s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 1 |
| ; RV32ZVBC32-NEXT: add s2, s2, s4 |
| ; RV32ZVBC32-NEXT: add s2, sp, s2 |
| ; RV32ZVBC32-NEXT: addi s2, s2, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (s2) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr s2, vlenb |
| ; RV32ZVBC32-NEXT: slli s2, s2, 4 |
| ; RV32ZVBC32-NEXT: mv s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 2 |
| ; RV32ZVBC32-NEXT: add s4, s4, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 1 |
| ; RV32ZVBC32-NEXT: add s2, s2, s4 |
| ; RV32ZVBC32-NEXT: add s2, sp, s2 |
| ; RV32ZVBC32-NEXT: addi s2, s2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s3), zero |
| ; RV32ZVBC32-NEXT: csrr s2, vlenb |
| ; RV32ZVBC32-NEXT: slli s2, s2, 6 |
| ; RV32ZVBC32-NEXT: mv s3, s2 |
| ; RV32ZVBC32-NEXT: slli s2, s2, 1 |
| ; RV32ZVBC32-NEXT: add s2, s2, s3 |
| ; RV32ZVBC32-NEXT: add s2, sp, s2 |
| ; RV32ZVBC32-NEXT: addi s2, s2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s1), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s0), zero |
| ; RV32ZVBC32-NEXT: csrr s0, vlenb |
| ; RV32ZVBC32-NEXT: slli s0, s0, 3 |
| ; RV32ZVBC32-NEXT: mv s1, s0 |
| ; RV32ZVBC32-NEXT: slli s0, s0, 3 |
| ; RV32ZVBC32-NEXT: add s1, s1, s0 |
| ; RV32ZVBC32-NEXT: slli s0, s0, 1 |
| ; RV32ZVBC32-NEXT: add s0, s0, s1 |
| ; RV32ZVBC32-NEXT: add s0, sp, s0 |
| ; RV32ZVBC32-NEXT: addi s0, s0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (s0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t5), zero |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: mv s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add s0, s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add s0, s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: add t5, t5, s0 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 6 |
| ; RV32ZVBC32-NEXT: mv s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add t5, t5, s0 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t5) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: mv s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: add t5, t5, s0 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 5 |
| ; RV32ZVBC32-NEXT: mv s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add t5, t5, s0 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: mv s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: add s0, s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add t5, t5, s0 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t5) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: mv s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add s0, s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: add t5, t5, s0 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: mv s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add s0, s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add s0, s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: add t5, t5, s0 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t5) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: mv s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: add s0, s0, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add t5, t5, s0 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t6), zero |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 3 |
| ; RV32ZVBC32-NEXT: mv t6, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add t6, t6, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 1 |
| ; RV32ZVBC32-NEXT: add t6, t6, t5 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: add t5, t5, t6 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (t4), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t3), zero |
| ; RV32ZVBC32-NEXT: csrr t3, vlenb |
| ; RV32ZVBC32-NEXT: slli t3, t3, 6 |
| ; RV32ZVBC32-NEXT: mv t4, t3 |
| ; RV32ZVBC32-NEXT: slli t3, t3, 1 |
| ; RV32ZVBC32-NEXT: add t3, t3, t4 |
| ; RV32ZVBC32-NEXT: add t3, sp, t3 |
| ; RV32ZVBC32-NEXT: addi t3, t3, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t3) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t1), zero |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 4 |
| ; RV32ZVBC32-NEXT: mv t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t3, t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 2 |
| ; RV32ZVBC32-NEXT: add t1, t1, t3 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 3 |
| ; RV32ZVBC32-NEXT: mv t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t3, t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t3, t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 2 |
| ; RV32ZVBC32-NEXT: add t1, t1, t3 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 5 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 3 |
| ; RV32ZVBC32-NEXT: mv t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t3, t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 2 |
| ; RV32ZVBC32-NEXT: add t1, t1, t3 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 6 |
| ; RV32ZVBC32-NEXT: mv t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, t3 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 4 |
| ; RV32ZVBC32-NEXT: mv t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 3 |
| ; RV32ZVBC32-NEXT: add t1, t1, t3 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 4 |
| ; RV32ZVBC32-NEXT: mv t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t3, t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 2 |
| ; RV32ZVBC32-NEXT: add t1, t1, t3 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (t1) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 6 |
| ; RV32ZVBC32-NEXT: mv t3, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t1, t1, t3 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (t2), zero |
| ; RV32ZVBC32-NEXT: csrr t1, vlenb |
| ; RV32ZVBC32-NEXT: slli t1, t1, 4 |
| ; RV32ZVBC32-NEXT: mv t2, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 1 |
| ; RV32ZVBC32-NEXT: add t2, t2, t1 |
| ; RV32ZVBC32-NEXT: slli t1, t1, 2 |
| ; RV32ZVBC32-NEXT: add t1, t1, t2 |
| ; RV32ZVBC32-NEXT: add t1, sp, t1 |
| ; RV32ZVBC32-NEXT: addi t1, t1, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (t1) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (t0), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a7), zero |
| ; RV32ZVBC32-NEXT: csrr a7, vlenb |
| ; RV32ZVBC32-NEXT: slli a7, a7, 3 |
| ; RV32ZVBC32-NEXT: mv t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 1 |
| ; RV32ZVBC32-NEXT: add t0, t0, a7 |
| ; RV32ZVBC32-NEXT: slli a7, a7, 2 |
| ; RV32ZVBC32-NEXT: add a7, a7, t0 |
| ; RV32ZVBC32-NEXT: add a7, sp, a7 |
| ; RV32ZVBC32-NEXT: addi a7, a7, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a6), zero |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 7 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 4 |
| ; RV32ZVBC32-NEXT: mv a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add a7, a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 2 |
| ; RV32ZVBC32-NEXT: add a6, a6, a7 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 3 |
| ; RV32ZVBC32-NEXT: mv a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add a6, a6, a7 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 4 |
| ; RV32ZVBC32-NEXT: mv a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 2 |
| ; RV32ZVBC32-NEXT: add a6, a6, a7 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 3 |
| ; RV32ZVBC32-NEXT: mv a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add a7, a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add a7, a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 2 |
| ; RV32ZVBC32-NEXT: add a6, a6, a7 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 3 |
| ; RV32ZVBC32-NEXT: mv a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 4 |
| ; RV32ZVBC32-NEXT: add a6, a6, a7 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 7 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a6, vlenb |
| ; RV32ZVBC32-NEXT: slli a6, a6, 3 |
| ; RV32ZVBC32-NEXT: mv a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add a7, a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 1 |
| ; RV32ZVBC32-NEXT: add a7, a7, a6 |
| ; RV32ZVBC32-NEXT: slli a6, a6, 2 |
| ; RV32ZVBC32-NEXT: add a6, a6, a7 |
| ; RV32ZVBC32-NEXT: add a6, sp, a6 |
| ; RV32ZVBC32-NEXT: addi a6, a6, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a5), zero |
| ; RV32ZVBC32-NEXT: csrr a5, vlenb |
| ; RV32ZVBC32-NEXT: slli a5, a5, 7 |
| ; RV32ZVBC32-NEXT: add a5, sp, a5 |
| ; RV32ZVBC32-NEXT: addi a5, a5, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a5) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (a4), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a2), zero |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 4 |
| ; RV32ZVBC32-NEXT: mv a4, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: add a4, a4, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 2 |
| ; RV32ZVBC32-NEXT: add a2, a2, a4 |
| ; RV32ZVBC32-NEXT: add a2, sp, a2 |
| ; RV32ZVBC32-NEXT: addi a2, a2, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a0), zero |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s11), zero |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 6 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (a3), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a1), zero |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s8), zero |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 6 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v0 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 6 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v24, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v0, v16, v24 |
| ; RV32ZVBC32-NEXT: lui a0, 262144 |
| ; RV32ZVBC32-NEXT: vand.vx v16, v16, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v24, v16 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 8 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 5 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 6 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 7 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 6 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 3 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a1, a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV32ZVBC32-NEXT: vmul.vv v8, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v8, v16, v8 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 4 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add sp, sp, a0 |
| ; RV32ZVBC32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: addi sp, sp, 352 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv8i64_vx: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: andi a1, a0, 2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 1 |
| ; RV64ZVBC32-NEXT: vsetvli a3, zero, e64, m8, ta, ma |
| ; RV64ZVBC32-NEXT: vmul.vx v16, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 4 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 8 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v24, v16 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 16 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 32 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 64 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 128 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 256 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 512 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a3, a0, 1024 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: li a1, 1 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a3 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 11 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 1 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 2 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 4 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 8 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 16 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 32 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 64 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 128 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 256 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 512 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 1024 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 2048 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 4096 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 8192 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 16384 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 32768 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 65536 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 131072 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 262144 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: srliw a2, a0, 31 |
| ; RV64ZVBC32-NEXT: slli a2, a2, 31 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 32 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 33 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 34 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 35 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 36 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 37 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 38 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 39 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 40 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 41 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 42 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 43 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 44 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 45 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 46 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 47 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 48 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 49 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 50 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 51 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 52 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 53 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 54 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 55 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 56 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 57 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 58 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 59 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 60 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 61 |
| ; RV64ZVBC32-NEXT: slli a1, a1, 62 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: and a1, a0, a1 |
| ; RV64ZVBC32-NEXT: srli a0, a0, 63 |
| ; RV64ZVBC32-NEXT: slli a0, a0, 63 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v24, v8, a1 |
| ; RV64ZVBC32-NEXT: vxor.vv v16, v16, v24 |
| ; RV64ZVBC32-NEXT: vmul.vx v8, v8, a0 |
| ; RV64ZVBC32-NEXT: vxor.vv v8, v16, v8 |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 |
| %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer |
| %v = call <vscale x 8 x i64> @llvm.clmul.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb) |
| ret <vscale x 8 x i64> %v |
| } |
| |
| define <vscale x 1 x i64> @clmul_nxv1i64_vv_mask(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %mask) { |
| ; RV32V-LABEL: clmul_nxv1i64_vv_mask: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -352 |
| ; RV32V-NEXT: .cfi_def_cfa_offset 352 |
| ; RV32V-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: .cfi_offset ra, -4 |
| ; RV32V-NEXT: .cfi_offset s0, -8 |
| ; RV32V-NEXT: .cfi_offset s1, -12 |
| ; RV32V-NEXT: .cfi_offset s2, -16 |
| ; RV32V-NEXT: .cfi_offset s3, -20 |
| ; RV32V-NEXT: .cfi_offset s4, -24 |
| ; RV32V-NEXT: .cfi_offset s5, -28 |
| ; RV32V-NEXT: .cfi_offset s6, -32 |
| ; RV32V-NEXT: .cfi_offset s7, -36 |
| ; RV32V-NEXT: .cfi_offset s8, -40 |
| ; RV32V-NEXT: .cfi_offset s9, -44 |
| ; RV32V-NEXT: .cfi_offset s10, -48 |
| ; RV32V-NEXT: .cfi_offset s11, -52 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 3 |
| ; RV32V-NEXT: sub a0, a1, a0 |
| ; RV32V-NEXT: sub sp, sp, a0 |
| ; RV32V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xe0, 0x02, 0x22, 0x11, 0x07, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 352 + 7 * vlenb |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: lui a1, 524288 |
| ; RV32V-NEXT: li t5, 1 |
| ; RV32V-NEXT: li a4, 2 |
| ; RV32V-NEXT: li a2, 4 |
| ; RV32V-NEXT: li ra, 8 |
| ; RV32V-NEXT: li a0, 16 |
| ; RV32V-NEXT: li s11, 32 |
| ; RV32V-NEXT: li s8, 64 |
| ; RV32V-NEXT: li s9, 128 |
| ; RV32V-NEXT: li s10, 256 |
| ; RV32V-NEXT: li s6, 512 |
| ; RV32V-NEXT: li s1, 1024 |
| ; RV32V-NEXT: lui s7, 1 |
| ; RV32V-NEXT: lui s5, 2 |
| ; RV32V-NEXT: lui s4, 4 |
| ; RV32V-NEXT: lui s3, 8 |
| ; RV32V-NEXT: lui s2, 16 |
| ; RV32V-NEXT: lui s0, 32 |
| ; RV32V-NEXT: lui t6, 64 |
| ; RV32V-NEXT: lui t4, 128 |
| ; RV32V-NEXT: lui t3, 256 |
| ; RV32V-NEXT: lui t2, 512 |
| ; RV32V-NEXT: lui t1, 1024 |
| ; RV32V-NEXT: lui t0, 2048 |
| ; RV32V-NEXT: lui a7, 4096 |
| ; RV32V-NEXT: lui a6, 8192 |
| ; RV32V-NEXT: lui a5, 16384 |
| ; RV32V-NEXT: lui a3, 32768 |
| ; RV32V-NEXT: sw a1, 16(sp) |
| ; RV32V-NEXT: sw zero, 20(sp) |
| ; RV32V-NEXT: sw zero, 272(sp) |
| ; RV32V-NEXT: sw t5, 276(sp) |
| ; RV32V-NEXT: sw zero, 264(sp) |
| ; RV32V-NEXT: sw a4, 268(sp) |
| ; RV32V-NEXT: lui a4, 65536 |
| ; RV32V-NEXT: sw zero, 256(sp) |
| ; RV32V-NEXT: sw a2, 260(sp) |
| ; RV32V-NEXT: lui a2, 131072 |
| ; RV32V-NEXT: sw zero, 248(sp) |
| ; RV32V-NEXT: sw ra, 252(sp) |
| ; RV32V-NEXT: vsetvli ra, zero, e64, m1, ta, mu |
| ; RV32V-NEXT: vand.vi v13, v9, 2 |
| ; RV32V-NEXT: vand.vi v14, v9, 1 |
| ; RV32V-NEXT: vand.vi v12, v9, 4 |
| ; RV32V-NEXT: vand.vi v11, v9, 8 |
| ; RV32V-NEXT: sw zero, 240(sp) |
| ; RV32V-NEXT: sw a0, 244(sp) |
| ; RV32V-NEXT: vand.vx v0, v9, a0 |
| ; RV32V-NEXT: addi ra, sp, 16 |
| ; RV32V-NEXT: sw zero, 232(sp) |
| ; RV32V-NEXT: sw s11, 236(sp) |
| ; RV32V-NEXT: vand.vx v15, v9, s11 |
| ; RV32V-NEXT: addi s11, sp, 272 |
| ; RV32V-NEXT: sw zero, 224(sp) |
| ; RV32V-NEXT: sw s8, 228(sp) |
| ; RV32V-NEXT: vand.vx v16, v9, s8 |
| ; RV32V-NEXT: addi s8, sp, 264 |
| ; RV32V-NEXT: sw zero, 216(sp) |
| ; RV32V-NEXT: sw s9, 220(sp) |
| ; RV32V-NEXT: vand.vx v17, v9, s9 |
| ; RV32V-NEXT: addi s9, sp, 256 |
| ; RV32V-NEXT: sw zero, 208(sp) |
| ; RV32V-NEXT: sw s10, 212(sp) |
| ; RV32V-NEXT: vand.vx v18, v9, s10 |
| ; RV32V-NEXT: addi s10, sp, 248 |
| ; RV32V-NEXT: sw zero, 200(sp) |
| ; RV32V-NEXT: sw s6, 204(sp) |
| ; RV32V-NEXT: vand.vx v19, v9, s6 |
| ; RV32V-NEXT: addi s6, sp, 240 |
| ; RV32V-NEXT: sw zero, 192(sp) |
| ; RV32V-NEXT: sw s1, 196(sp) |
| ; RV32V-NEXT: vand.vx v20, v9, s1 |
| ; RV32V-NEXT: slli t5, t5, 11 |
| ; RV32V-NEXT: vand.vx v21, v9, s7 |
| ; RV32V-NEXT: sw zero, 184(sp) |
| ; RV32V-NEXT: sw t5, 188(sp) |
| ; RV32V-NEXT: sw zero, 176(sp) |
| ; RV32V-NEXT: sw s7, 180(sp) |
| ; RV32V-NEXT: vand.vx v22, v9, s5 |
| ; RV32V-NEXT: sw zero, 168(sp) |
| ; RV32V-NEXT: sw s5, 172(sp) |
| ; RV32V-NEXT: addi s7, sp, 216 |
| ; RV32V-NEXT: vand.vx v23, v9, s4 |
| ; RV32V-NEXT: sw zero, 160(sp) |
| ; RV32V-NEXT: sw s4, 164(sp) |
| ; RV32V-NEXT: addi s5, sp, 208 |
| ; RV32V-NEXT: vand.vx v24, v9, s3 |
| ; RV32V-NEXT: sw zero, 152(sp) |
| ; RV32V-NEXT: sw s3, 156(sp) |
| ; RV32V-NEXT: addi s4, sp, 200 |
| ; RV32V-NEXT: vand.vx v25, v9, s2 |
| ; RV32V-NEXT: sw zero, 144(sp) |
| ; RV32V-NEXT: sw s2, 148(sp) |
| ; RV32V-NEXT: addi s3, sp, 192 |
| ; RV32V-NEXT: vand.vx v26, v9, s0 |
| ; RV32V-NEXT: sw zero, 136(sp) |
| ; RV32V-NEXT: sw s0, 140(sp) |
| ; RV32V-NEXT: addi s2, sp, 184 |
| ; RV32V-NEXT: vand.vx v27, v9, t6 |
| ; RV32V-NEXT: sw zero, 128(sp) |
| ; RV32V-NEXT: sw t6, 132(sp) |
| ; RV32V-NEXT: addi s1, sp, 176 |
| ; RV32V-NEXT: vand.vx v28, v9, t4 |
| ; RV32V-NEXT: sw zero, 120(sp) |
| ; RV32V-NEXT: sw t4, 124(sp) |
| ; RV32V-NEXT: addi s0, sp, 168 |
| ; RV32V-NEXT: vand.vx v29, v9, t3 |
| ; RV32V-NEXT: sw zero, 112(sp) |
| ; RV32V-NEXT: sw t3, 116(sp) |
| ; RV32V-NEXT: addi t6, sp, 160 |
| ; RV32V-NEXT: vand.vx v30, v9, t2 |
| ; RV32V-NEXT: sw zero, 104(sp) |
| ; RV32V-NEXT: sw t2, 108(sp) |
| ; RV32V-NEXT: addi t4, sp, 152 |
| ; RV32V-NEXT: vand.vx v31, v9, t1 |
| ; RV32V-NEXT: sw zero, 96(sp) |
| ; RV32V-NEXT: sw t1, 100(sp) |
| ; RV32V-NEXT: addi t3, sp, 144 |
| ; RV32V-NEXT: vand.vx v7, v9, t0 |
| ; RV32V-NEXT: sw zero, 88(sp) |
| ; RV32V-NEXT: sw t0, 92(sp) |
| ; RV32V-NEXT: addi t2, sp, 136 |
| ; RV32V-NEXT: vand.vx v6, v9, a7 |
| ; RV32V-NEXT: sw zero, 80(sp) |
| ; RV32V-NEXT: sw a7, 84(sp) |
| ; RV32V-NEXT: addi t1, sp, 128 |
| ; RV32V-NEXT: vand.vx v5, v9, a6 |
| ; RV32V-NEXT: sw zero, 72(sp) |
| ; RV32V-NEXT: sw a6, 76(sp) |
| ; RV32V-NEXT: addi t0, sp, 120 |
| ; RV32V-NEXT: vand.vx v4, v9, a5 |
| ; RV32V-NEXT: sw zero, 64(sp) |
| ; RV32V-NEXT: sw a5, 68(sp) |
| ; RV32V-NEXT: addi a7, sp, 112 |
| ; RV32V-NEXT: vand.vx v3, v9, a3 |
| ; RV32V-NEXT: sw zero, 56(sp) |
| ; RV32V-NEXT: sw a3, 60(sp) |
| ; RV32V-NEXT: addi a6, sp, 104 |
| ; RV32V-NEXT: vand.vx v2, v9, a4 |
| ; RV32V-NEXT: sw zero, 48(sp) |
| ; RV32V-NEXT: sw a4, 52(sp) |
| ; RV32V-NEXT: addi a5, sp, 96 |
| ; RV32V-NEXT: vand.vx v1, v9, a2 |
| ; RV32V-NEXT: sw zero, 40(sp) |
| ; RV32V-NEXT: sw a2, 44(sp) |
| ; RV32V-NEXT: addi a4, sp, 88 |
| ; RV32V-NEXT: sw zero, 32(sp) |
| ; RV32V-NEXT: lui a1, 262144 |
| ; RV32V-NEXT: sw a1, 36(sp) |
| ; RV32V-NEXT: sw zero, 24(sp) |
| ; RV32V-NEXT: lui a0, 524288 |
| ; RV32V-NEXT: sw a0, 28(sp) |
| ; RV32V-NEXT: addi a3, sp, 80 |
| ; RV32V-NEXT: vmul.vv v10, v8, v13 |
| ; RV32V-NEXT: vmul.vv v13, v8, v14 |
| ; RV32V-NEXT: vxor.vv v13, v13, v10 |
| ; RV32V-NEXT: vand.vx v10, v9, t5 |
| ; RV32V-NEXT: addi a2, sp, 72 |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v13, v13, v12 |
| ; RV32V-NEXT: vlse64.v v12, (ra), zero |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli t5, a0, 2 |
| ; RV32V-NEXT: add a0, t5, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi a0, sp, 64 |
| ; RV32V-NEXT: vmul.vv v11, v8, v11 |
| ; RV32V-NEXT: vxor.vv v13, v13, v11 |
| ; RV32V-NEXT: vlse64.v v11, (s11), zero |
| ; RV32V-NEXT: csrr t5, vlenb |
| ; RV32V-NEXT: slli t5, t5, 2 |
| ; RV32V-NEXT: add t5, sp, t5 |
| ; RV32V-NEXT: addi t5, t5, 288 |
| ; RV32V-NEXT: vs1r.v v11, (t5) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi s11, sp, 56 |
| ; RV32V-NEXT: vmul.vv v14, v8, v0 |
| ; RV32V-NEXT: vxor.vv v13, v13, v14 |
| ; RV32V-NEXT: vlse64.v v0, (s8), zero |
| ; RV32V-NEXT: addi ra, sp, 48 |
| ; RV32V-NEXT: vmul.vv v14, v8, v15 |
| ; RV32V-NEXT: vxor.vv v14, v13, v14 |
| ; RV32V-NEXT: vlse64.v v12, (s9), zero |
| ; RV32V-NEXT: addi t5, sp, 40 |
| ; RV32V-NEXT: vmul.vv v15, v8, v16 |
| ; RV32V-NEXT: vxor.vv v15, v14, v15 |
| ; RV32V-NEXT: vlse64.v v11, (s10), zero |
| ; RV32V-NEXT: csrr s8, vlenb |
| ; RV32V-NEXT: slli s9, s8, 1 |
| ; RV32V-NEXT: add s8, s9, s8 |
| ; RV32V-NEXT: add s8, sp, s8 |
| ; RV32V-NEXT: addi s8, s8, 288 |
| ; RV32V-NEXT: vs1r.v v11, (s8) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi s8, sp, 32 |
| ; RV32V-NEXT: vmul.vv v16, v8, v17 |
| ; RV32V-NEXT: vxor.vv v16, v15, v16 |
| ; RV32V-NEXT: vlse64.v v11, (s6), zero |
| ; RV32V-NEXT: csrr s6, vlenb |
| ; RV32V-NEXT: slli s6, s6, 1 |
| ; RV32V-NEXT: add s6, sp, s6 |
| ; RV32V-NEXT: addi s6, s6, 288 |
| ; RV32V-NEXT: vs1r.v v11, (s6) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi s6, sp, 24 |
| ; RV32V-NEXT: vmul.vv v17, v8, v18 |
| ; RV32V-NEXT: vmul.vv v18, v8, v19 |
| ; RV32V-NEXT: vmul.vv v19, v8, v20 |
| ; RV32V-NEXT: vmul.vv v20, v8, v21 |
| ; RV32V-NEXT: vmul.vv v21, v8, v22 |
| ; RV32V-NEXT: vmul.vv v22, v8, v23 |
| ; RV32V-NEXT: vmul.vv v23, v8, v24 |
| ; RV32V-NEXT: vmul.vv v24, v8, v25 |
| ; RV32V-NEXT: vmul.vv v25, v8, v26 |
| ; RV32V-NEXT: vmul.vv v26, v8, v27 |
| ; RV32V-NEXT: vmul.vv v27, v8, v28 |
| ; RV32V-NEXT: vmul.vv v28, v8, v29 |
| ; RV32V-NEXT: vmul.vv v29, v8, v30 |
| ; RV32V-NEXT: vmul.vv v30, v8, v31 |
| ; RV32V-NEXT: vmul.vv v31, v8, v7 |
| ; RV32V-NEXT: vmul.vv v7, v8, v6 |
| ; RV32V-NEXT: vmul.vv v6, v8, v5 |
| ; RV32V-NEXT: vmul.vv v5, v8, v4 |
| ; RV32V-NEXT: vmul.vv v4, v8, v3 |
| ; RV32V-NEXT: vmul.vv v3, v8, v2 |
| ; RV32V-NEXT: vmul.vv v2, v8, v1 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v16, v16, v17 |
| ; RV32V-NEXT: addi s9, sp, 232 |
| ; RV32V-NEXT: vlse64.v v11, (s9), zero |
| ; RV32V-NEXT: csrr s9, vlenb |
| ; RV32V-NEXT: add s9, sp, s9 |
| ; RV32V-NEXT: addi s9, s9, 288 |
| ; RV32V-NEXT: vs1r.v v11, (s9) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vxor.vv v16, v16, v18 |
| ; RV32V-NEXT: addi s9, sp, 224 |
| ; RV32V-NEXT: vlse64.v v11, (s9), zero |
| ; RV32V-NEXT: addi s9, sp, 288 |
| ; RV32V-NEXT: vs1r.v v11, (s9) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vxor.vv v16, v16, v19 |
| ; RV32V-NEXT: vlse64.v v19, (s7), zero |
| ; RV32V-NEXT: vxor.vv v10, v16, v10 |
| ; RV32V-NEXT: vlse64.v v13, (s5), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v20 |
| ; RV32V-NEXT: vlse64.v v20, (s4), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v21 |
| ; RV32V-NEXT: vlse64.v v21, (s3), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v22 |
| ; RV32V-NEXT: vlse64.v v22, (s2), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v23 |
| ; RV32V-NEXT: vlse64.v v23, (s1), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v24 |
| ; RV32V-NEXT: vlse64.v v24, (s0), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v25 |
| ; RV32V-NEXT: vlse64.v v25, (t6), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v26 |
| ; RV32V-NEXT: vlse64.v v26, (t4), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v27 |
| ; RV32V-NEXT: vlse64.v v27, (t3), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v28 |
| ; RV32V-NEXT: vlse64.v v28, (t2), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v29 |
| ; RV32V-NEXT: vlse64.v v29, (t1), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v30 |
| ; RV32V-NEXT: vlse64.v v30, (t0), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v31 |
| ; RV32V-NEXT: vlse64.v v31, (a7), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v7 |
| ; RV32V-NEXT: vlse64.v v7, (a6), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v6 |
| ; RV32V-NEXT: vlse64.v v6, (a5), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v5 |
| ; RV32V-NEXT: vlse64.v v5, (a4), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v4 |
| ; RV32V-NEXT: vlse64.v v4, (a3), zero |
| ; RV32V-NEXT: vxor.vv v10, v10, v3 |
| ; RV32V-NEXT: vlse64.v v3, (a2), zero |
| ; RV32V-NEXT: vxor.vv v2, v10, v2 |
| ; RV32V-NEXT: vlse64.v v11, (a0), zero |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a2, a0, 2 |
| ; RV32V-NEXT: add a0, a2, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v1, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v14, v9, v10 |
| ; RV32V-NEXT: vand.vv v0, v9, v0 |
| ; RV32V-NEXT: vand.vv v15, v9, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a2, a0, 1 |
| ; RV32V-NEXT: add a0, a2, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v16, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v17, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v18, v9, v10 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v10, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v19, v9, v19 |
| ; RV32V-NEXT: vand.vv v10, v9, v13 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v20, v9, v20 |
| ; RV32V-NEXT: vand.vv v21, v9, v21 |
| ; RV32V-NEXT: vand.vv v22, v9, v22 |
| ; RV32V-NEXT: vand.vv v23, v9, v23 |
| ; RV32V-NEXT: vand.vv v24, v9, v24 |
| ; RV32V-NEXT: vand.vv v25, v9, v25 |
| ; RV32V-NEXT: vand.vv v26, v9, v26 |
| ; RV32V-NEXT: vand.vv v27, v9, v27 |
| ; RV32V-NEXT: vand.vv v28, v9, v28 |
| ; RV32V-NEXT: vand.vv v29, v9, v29 |
| ; RV32V-NEXT: vand.vv v30, v9, v30 |
| ; RV32V-NEXT: vand.vv v31, v9, v31 |
| ; RV32V-NEXT: vand.vv v7, v9, v7 |
| ; RV32V-NEXT: vand.vv v6, v9, v6 |
| ; RV32V-NEXT: vand.vv v5, v9, v5 |
| ; RV32V-NEXT: vlse64.v v10, (s11), zero |
| ; RV32V-NEXT: vand.vv v4, v9, v4 |
| ; RV32V-NEXT: vand.vv v3, v9, v3 |
| ; RV32V-NEXT: vand.vv v11, v9, v11 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v10, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a2, a0, 2 |
| ; RV32V-NEXT: add a0, a2, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v10, (ra), zero |
| ; RV32V-NEXT: vlse64.v v11, (t5), zero |
| ; RV32V-NEXT: vlse64.v v12, (s8), zero |
| ; RV32V-NEXT: vlse64.v v13, (s6), zero |
| ; RV32V-NEXT: vand.vv v10, v9, v10 |
| ; RV32V-NEXT: vand.vv v11, v9, v11 |
| ; RV32V-NEXT: vand.vv v12, v9, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a2, a0, 1 |
| ; RV32V-NEXT: add a0, a2, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v13, v9, v13 |
| ; RV32V-NEXT: vand.vx v9, v9, a1 |
| ; RV32V-NEXT: vmul.vv v9, v8, v9 |
| ; RV32V-NEXT: vxor.vv v9, v2, v9 |
| ; RV32V-NEXT: vmul.vv v12, v8, v1 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v14 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v0 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v15 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v16 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v17 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v18 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v19 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v20 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v21 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v22 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v23 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v24 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v25 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v26 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v27 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v28 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v29 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v30 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v31 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v7 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v6 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v5 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v4 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v12, v8, v3 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 2 |
| ; RV32V-NEXT: add a0, a1, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v12, v8, v12 |
| ; RV32V-NEXT: vxor.vv v9, v9, v12 |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v11 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 1 |
| ; RV32V-NEXT: add a0, a1, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v13 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vxor.vv v8, v9, v10, v0.t |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 3 |
| ; RV32V-NEXT: sub a0, a1, a0 |
| ; RV32V-NEXT: add sp, sp, a0 |
| ; RV32V-NEXT: .cfi_def_cfa sp, 352 |
| ; RV32V-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: .cfi_restore ra |
| ; RV32V-NEXT: .cfi_restore s0 |
| ; RV32V-NEXT: .cfi_restore s1 |
| ; RV32V-NEXT: .cfi_restore s2 |
| ; RV32V-NEXT: .cfi_restore s3 |
| ; RV32V-NEXT: .cfi_restore s4 |
| ; RV32V-NEXT: .cfi_restore s5 |
| ; RV32V-NEXT: .cfi_restore s6 |
| ; RV32V-NEXT: .cfi_restore s7 |
| ; RV32V-NEXT: .cfi_restore s8 |
| ; RV32V-NEXT: .cfi_restore s9 |
| ; RV32V-NEXT: .cfi_restore s10 |
| ; RV32V-NEXT: .cfi_restore s11 |
| ; RV32V-NEXT: addi sp, sp, 352 |
| ; RV32V-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv1i64_vv_mask: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: vsetvli a0, zero, e64, m1, ta, mu |
| ; RV64V-NEXT: vand.vi v10, v9, 2 |
| ; RV64V-NEXT: vand.vi v11, v9, 1 |
| ; RV64V-NEXT: vmul.vv v10, v8, v10 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v11, v10 |
| ; RV64V-NEXT: vand.vi v11, v9, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vi v11, v9, 8 |
| ; RV64V-NEXT: li a0, 16 |
| ; RV64V-NEXT: li a1, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: li a1, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a0, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: li a1, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: li a2, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: li a0, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a2 |
| ; RV64V-NEXT: slli a1, a0, 11 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 2 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 4 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 8 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 16 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 64 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 128 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 256 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 512 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 1024 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 2048 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 4096 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 8192 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 16384 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 32768 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 65536 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 131072 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: lui a1, 262144 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 31 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 32 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 33 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 34 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 35 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 36 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 37 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 38 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 39 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 40 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 41 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 42 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 43 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 44 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 45 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 46 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 47 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 48 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 49 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 50 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 51 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 52 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 53 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 54 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 55 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 56 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 57 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 58 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 59 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 60 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: slli a1, a0, 61 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a1 |
| ; RV64V-NEXT: li a1, -1 |
| ; RV64V-NEXT: slli a0, a0, 62 |
| ; RV64V-NEXT: slli a1, a1, 63 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vand.vx v11, v9, a0 |
| ; RV64V-NEXT: vand.vx v9, v9, a1 |
| ; RV64V-NEXT: vmul.vv v11, v8, v11 |
| ; RV64V-NEXT: vxor.vv v10, v10, v11 |
| ; RV64V-NEXT: vmul.vv v9, v8, v9 |
| ; RV64V-NEXT: vxor.vv v8, v10, v9, v0.t |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv1i64_vv_mask: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: vsetvli a0, zero, e64, m1, ta, mu |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v8, v9, v0.t |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv1i64_vv_mask: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a0, zero, e64, m1, ta, mu |
| ; RV64ZVBC64-NEXT: vclmul.vv v8, v8, v9, v0.t |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv1i64_vv_mask: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: addi sp, sp, -352 |
| ; RV32ZVBC32-NEXT: .cfi_def_cfa_offset 352 |
| ; RV32ZVBC32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: .cfi_offset ra, -4 |
| ; RV32ZVBC32-NEXT: .cfi_offset s0, -8 |
| ; RV32ZVBC32-NEXT: .cfi_offset s1, -12 |
| ; RV32ZVBC32-NEXT: .cfi_offset s2, -16 |
| ; RV32ZVBC32-NEXT: .cfi_offset s3, -20 |
| ; RV32ZVBC32-NEXT: .cfi_offset s4, -24 |
| ; RV32ZVBC32-NEXT: .cfi_offset s5, -28 |
| ; RV32ZVBC32-NEXT: .cfi_offset s6, -32 |
| ; RV32ZVBC32-NEXT: .cfi_offset s7, -36 |
| ; RV32ZVBC32-NEXT: .cfi_offset s8, -40 |
| ; RV32ZVBC32-NEXT: .cfi_offset s9, -44 |
| ; RV32ZVBC32-NEXT: .cfi_offset s10, -48 |
| ; RV32ZVBC32-NEXT: .cfi_offset s11, -52 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 3 |
| ; RV32ZVBC32-NEXT: sub a0, a1, a0 |
| ; RV32ZVBC32-NEXT: sub sp, sp, a0 |
| ; RV32ZVBC32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xe0, 0x02, 0x22, 0x11, 0x07, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 352 + 7 * vlenb |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: lui a1, 524288 |
| ; RV32ZVBC32-NEXT: li t5, 1 |
| ; RV32ZVBC32-NEXT: li a4, 2 |
| ; RV32ZVBC32-NEXT: li a2, 4 |
| ; RV32ZVBC32-NEXT: li ra, 8 |
| ; RV32ZVBC32-NEXT: li a0, 16 |
| ; RV32ZVBC32-NEXT: li s11, 32 |
| ; RV32ZVBC32-NEXT: li s8, 64 |
| ; RV32ZVBC32-NEXT: li s9, 128 |
| ; RV32ZVBC32-NEXT: li s10, 256 |
| ; RV32ZVBC32-NEXT: li s6, 512 |
| ; RV32ZVBC32-NEXT: li s1, 1024 |
| ; RV32ZVBC32-NEXT: lui s7, 1 |
| ; RV32ZVBC32-NEXT: lui s5, 2 |
| ; RV32ZVBC32-NEXT: lui s4, 4 |
| ; RV32ZVBC32-NEXT: lui s3, 8 |
| ; RV32ZVBC32-NEXT: lui s2, 16 |
| ; RV32ZVBC32-NEXT: lui s0, 32 |
| ; RV32ZVBC32-NEXT: lui t6, 64 |
| ; RV32ZVBC32-NEXT: lui t4, 128 |
| ; RV32ZVBC32-NEXT: lui t3, 256 |
| ; RV32ZVBC32-NEXT: lui t2, 512 |
| ; RV32ZVBC32-NEXT: lui t1, 1024 |
| ; RV32ZVBC32-NEXT: lui t0, 2048 |
| ; RV32ZVBC32-NEXT: lui a7, 4096 |
| ; RV32ZVBC32-NEXT: lui a6, 8192 |
| ; RV32ZVBC32-NEXT: lui a5, 16384 |
| ; RV32ZVBC32-NEXT: lui a3, 32768 |
| ; RV32ZVBC32-NEXT: sw a1, 16(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 20(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 272(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 276(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 264(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 268(sp) |
| ; RV32ZVBC32-NEXT: lui a4, 65536 |
| ; RV32ZVBC32-NEXT: sw zero, 256(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 260(sp) |
| ; RV32ZVBC32-NEXT: lui a2, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 248(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 252(sp) |
| ; RV32ZVBC32-NEXT: vsetvli ra, zero, e64, m1, ta, mu |
| ; RV32ZVBC32-NEXT: vand.vi v13, v9, 2 |
| ; RV32ZVBC32-NEXT: vand.vi v14, v9, 1 |
| ; RV32ZVBC32-NEXT: vand.vi v12, v9, 4 |
| ; RV32ZVBC32-NEXT: vand.vi v11, v9, 8 |
| ; RV32ZVBC32-NEXT: sw zero, 240(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 244(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v0, v9, a0 |
| ; RV32ZVBC32-NEXT: addi ra, sp, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 232(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 236(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v15, v9, s11 |
| ; RV32ZVBC32-NEXT: addi s11, sp, 272 |
| ; RV32ZVBC32-NEXT: sw zero, 224(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 228(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v16, v9, s8 |
| ; RV32ZVBC32-NEXT: addi s8, sp, 264 |
| ; RV32ZVBC32-NEXT: sw zero, 216(sp) |
| ; RV32ZVBC32-NEXT: sw s9, 220(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v17, v9, s9 |
| ; RV32ZVBC32-NEXT: addi s9, sp, 256 |
| ; RV32ZVBC32-NEXT: sw zero, 208(sp) |
| ; RV32ZVBC32-NEXT: sw s10, 212(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v18, v9, s10 |
| ; RV32ZVBC32-NEXT: addi s10, sp, 248 |
| ; RV32ZVBC32-NEXT: sw zero, 200(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 204(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v19, v9, s6 |
| ; RV32ZVBC32-NEXT: addi s6, sp, 240 |
| ; RV32ZVBC32-NEXT: sw zero, 192(sp) |
| ; RV32ZVBC32-NEXT: sw s1, 196(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v20, v9, s1 |
| ; RV32ZVBC32-NEXT: slli t5, t5, 11 |
| ; RV32ZVBC32-NEXT: vand.vx v21, v9, s7 |
| ; RV32ZVBC32-NEXT: sw zero, 184(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 188(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 176(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 180(sp) |
| ; RV32ZVBC32-NEXT: vand.vx v22, v9, s5 |
| ; RV32ZVBC32-NEXT: sw zero, 168(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 172(sp) |
| ; RV32ZVBC32-NEXT: addi s7, sp, 216 |
| ; RV32ZVBC32-NEXT: vand.vx v23, v9, s4 |
| ; RV32ZVBC32-NEXT: sw zero, 160(sp) |
| ; RV32ZVBC32-NEXT: sw s4, 164(sp) |
| ; RV32ZVBC32-NEXT: addi s5, sp, 208 |
| ; RV32ZVBC32-NEXT: vand.vx v24, v9, s3 |
| ; RV32ZVBC32-NEXT: sw zero, 152(sp) |
| ; RV32ZVBC32-NEXT: sw s3, 156(sp) |
| ; RV32ZVBC32-NEXT: addi s4, sp, 200 |
| ; RV32ZVBC32-NEXT: vand.vx v25, v9, s2 |
| ; RV32ZVBC32-NEXT: sw zero, 144(sp) |
| ; RV32ZVBC32-NEXT: sw s2, 148(sp) |
| ; RV32ZVBC32-NEXT: addi s3, sp, 192 |
| ; RV32ZVBC32-NEXT: vand.vx v26, v9, s0 |
| ; RV32ZVBC32-NEXT: sw zero, 136(sp) |
| ; RV32ZVBC32-NEXT: sw s0, 140(sp) |
| ; RV32ZVBC32-NEXT: addi s2, sp, 184 |
| ; RV32ZVBC32-NEXT: vand.vx v27, v9, t6 |
| ; RV32ZVBC32-NEXT: sw zero, 128(sp) |
| ; RV32ZVBC32-NEXT: sw t6, 132(sp) |
| ; RV32ZVBC32-NEXT: addi s1, sp, 176 |
| ; RV32ZVBC32-NEXT: vand.vx v28, v9, t4 |
| ; RV32ZVBC32-NEXT: sw zero, 120(sp) |
| ; RV32ZVBC32-NEXT: sw t4, 124(sp) |
| ; RV32ZVBC32-NEXT: addi s0, sp, 168 |
| ; RV32ZVBC32-NEXT: vand.vx v29, v9, t3 |
| ; RV32ZVBC32-NEXT: sw zero, 112(sp) |
| ; RV32ZVBC32-NEXT: sw t3, 116(sp) |
| ; RV32ZVBC32-NEXT: addi t6, sp, 160 |
| ; RV32ZVBC32-NEXT: vand.vx v30, v9, t2 |
| ; RV32ZVBC32-NEXT: sw zero, 104(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 108(sp) |
| ; RV32ZVBC32-NEXT: addi t4, sp, 152 |
| ; RV32ZVBC32-NEXT: vand.vx v31, v9, t1 |
| ; RV32ZVBC32-NEXT: sw zero, 96(sp) |
| ; RV32ZVBC32-NEXT: sw t1, 100(sp) |
| ; RV32ZVBC32-NEXT: addi t3, sp, 144 |
| ; RV32ZVBC32-NEXT: vand.vx v7, v9, t0 |
| ; RV32ZVBC32-NEXT: sw zero, 88(sp) |
| ; RV32ZVBC32-NEXT: sw t0, 92(sp) |
| ; RV32ZVBC32-NEXT: addi t2, sp, 136 |
| ; RV32ZVBC32-NEXT: vand.vx v6, v9, a7 |
| ; RV32ZVBC32-NEXT: sw zero, 80(sp) |
| ; RV32ZVBC32-NEXT: sw a7, 84(sp) |
| ; RV32ZVBC32-NEXT: addi t1, sp, 128 |
| ; RV32ZVBC32-NEXT: vand.vx v5, v9, a6 |
| ; RV32ZVBC32-NEXT: sw zero, 72(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 76(sp) |
| ; RV32ZVBC32-NEXT: addi t0, sp, 120 |
| ; RV32ZVBC32-NEXT: vand.vx v4, v9, a5 |
| ; RV32ZVBC32-NEXT: sw zero, 64(sp) |
| ; RV32ZVBC32-NEXT: sw a5, 68(sp) |
| ; RV32ZVBC32-NEXT: addi a7, sp, 112 |
| ; RV32ZVBC32-NEXT: vand.vx v3, v9, a3 |
| ; RV32ZVBC32-NEXT: sw zero, 56(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 60(sp) |
| ; RV32ZVBC32-NEXT: addi a6, sp, 104 |
| ; RV32ZVBC32-NEXT: vand.vx v2, v9, a4 |
| ; RV32ZVBC32-NEXT: sw zero, 48(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 52(sp) |
| ; RV32ZVBC32-NEXT: addi a5, sp, 96 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a2 |
| ; RV32ZVBC32-NEXT: sw zero, 40(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 44(sp) |
| ; RV32ZVBC32-NEXT: addi a4, sp, 88 |
| ; RV32ZVBC32-NEXT: sw zero, 32(sp) |
| ; RV32ZVBC32-NEXT: lui a1, 262144 |
| ; RV32ZVBC32-NEXT: sw a1, 36(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 24(sp) |
| ; RV32ZVBC32-NEXT: lui a0, 524288 |
| ; RV32ZVBC32-NEXT: sw a0, 28(sp) |
| ; RV32ZVBC32-NEXT: addi a3, sp, 80 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v13 |
| ; RV32ZVBC32-NEXT: vmul.vv v13, v8, v14 |
| ; RV32ZVBC32-NEXT: vxor.vv v13, v13, v10 |
| ; RV32ZVBC32-NEXT: vand.vx v10, v9, t5 |
| ; RV32ZVBC32-NEXT: addi a2, sp, 72 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v13, v13, v12 |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (ra), zero |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, t5, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi a0, sp, 64 |
| ; RV32ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV32ZVBC32-NEXT: vxor.vv v13, v13, v11 |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (s11), zero |
| ; RV32ZVBC32-NEXT: csrr t5, vlenb |
| ; RV32ZVBC32-NEXT: slli t5, t5, 2 |
| ; RV32ZVBC32-NEXT: add t5, sp, t5 |
| ; RV32ZVBC32-NEXT: addi t5, t5, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (t5) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s11, sp, 56 |
| ; RV32ZVBC32-NEXT: vmul.vv v14, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v13, v13, v14 |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (s8), zero |
| ; RV32ZVBC32-NEXT: addi ra, sp, 48 |
| ; RV32ZVBC32-NEXT: vmul.vv v14, v8, v15 |
| ; RV32ZVBC32-NEXT: vxor.vv v14, v13, v14 |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s9), zero |
| ; RV32ZVBC32-NEXT: addi t5, sp, 40 |
| ; RV32ZVBC32-NEXT: vmul.vv v15, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v15, v14, v15 |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (s10), zero |
| ; RV32ZVBC32-NEXT: csrr s8, vlenb |
| ; RV32ZVBC32-NEXT: slli s9, s8, 1 |
| ; RV32ZVBC32-NEXT: add s8, s9, s8 |
| ; RV32ZVBC32-NEXT: add s8, sp, s8 |
| ; RV32ZVBC32-NEXT: addi s8, s8, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (s8) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s8, sp, 32 |
| ; RV32ZVBC32-NEXT: vmul.vv v16, v8, v17 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v15, v16 |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (s6), zero |
| ; RV32ZVBC32-NEXT: csrr s6, vlenb |
| ; RV32ZVBC32-NEXT: slli s6, s6, 1 |
| ; RV32ZVBC32-NEXT: add s6, sp, s6 |
| ; RV32ZVBC32-NEXT: addi s6, s6, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (s6) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi s6, sp, 24 |
| ; RV32ZVBC32-NEXT: vmul.vv v17, v8, v18 |
| ; RV32ZVBC32-NEXT: vmul.vv v18, v8, v19 |
| ; RV32ZVBC32-NEXT: vmul.vv v19, v8, v20 |
| ; RV32ZVBC32-NEXT: vmul.vv v20, v8, v21 |
| ; RV32ZVBC32-NEXT: vmul.vv v21, v8, v22 |
| ; RV32ZVBC32-NEXT: vmul.vv v22, v8, v23 |
| ; RV32ZVBC32-NEXT: vmul.vv v23, v8, v24 |
| ; RV32ZVBC32-NEXT: vmul.vv v24, v8, v25 |
| ; RV32ZVBC32-NEXT: vmul.vv v25, v8, v26 |
| ; RV32ZVBC32-NEXT: vmul.vv v26, v8, v27 |
| ; RV32ZVBC32-NEXT: vmul.vv v27, v8, v28 |
| ; RV32ZVBC32-NEXT: vmul.vv v28, v8, v29 |
| ; RV32ZVBC32-NEXT: vmul.vv v29, v8, v30 |
| ; RV32ZVBC32-NEXT: vmul.vv v30, v8, v31 |
| ; RV32ZVBC32-NEXT: vmul.vv v31, v8, v7 |
| ; RV32ZVBC32-NEXT: vmul.vv v7, v8, v6 |
| ; RV32ZVBC32-NEXT: vmul.vv v6, v8, v5 |
| ; RV32ZVBC32-NEXT: vmul.vv v5, v8, v4 |
| ; RV32ZVBC32-NEXT: vmul.vv v4, v8, v3 |
| ; RV32ZVBC32-NEXT: vmul.vv v3, v8, v2 |
| ; RV32ZVBC32-NEXT: vmul.vv v2, v8, v1 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v17 |
| ; RV32ZVBC32-NEXT: addi s9, sp, 232 |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (s9), zero |
| ; RV32ZVBC32-NEXT: csrr s9, vlenb |
| ; RV32ZVBC32-NEXT: add s9, sp, s9 |
| ; RV32ZVBC32-NEXT: addi s9, s9, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (s9) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v18 |
| ; RV32ZVBC32-NEXT: addi s9, sp, 224 |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (s9), zero |
| ; RV32ZVBC32-NEXT: addi s9, sp, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (s9) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vxor.vv v16, v16, v19 |
| ; RV32ZVBC32-NEXT: vlse64.v v19, (s7), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v16, v10 |
| ; RV32ZVBC32-NEXT: vlse64.v v13, (s5), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v20 |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (s4), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v21 |
| ; RV32ZVBC32-NEXT: vlse64.v v21, (s3), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v22 |
| ; RV32ZVBC32-NEXT: vlse64.v v22, (s2), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v23 |
| ; RV32ZVBC32-NEXT: vlse64.v v23, (s1), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v24 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (s0), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v25 |
| ; RV32ZVBC32-NEXT: vlse64.v v25, (t6), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v26 |
| ; RV32ZVBC32-NEXT: vlse64.v v26, (t4), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v27 |
| ; RV32ZVBC32-NEXT: vlse64.v v27, (t3), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v28 |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (t2), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v29 |
| ; RV32ZVBC32-NEXT: vlse64.v v29, (t1), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v30 |
| ; RV32ZVBC32-NEXT: vlse64.v v30, (t0), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v31 |
| ; RV32ZVBC32-NEXT: vlse64.v v31, (a7), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v7 |
| ; RV32ZVBC32-NEXT: vlse64.v v7, (a6), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v6 |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (a5), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v5 |
| ; RV32ZVBC32-NEXT: vlse64.v v5, (a4), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v4 |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (a3), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v10, v10, v3 |
| ; RV32ZVBC32-NEXT: vlse64.v v3, (a2), zero |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v10, v2 |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (a0), zero |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a2, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v1, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v14, v9, v10 |
| ; RV32ZVBC32-NEXT: vand.vv v0, v9, v0 |
| ; RV32ZVBC32-NEXT: vand.vv v15, v9, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a2, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v16, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v17, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v18, v9, v10 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v19, v9, v19 |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v13 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v20, v9, v20 |
| ; RV32ZVBC32-NEXT: vand.vv v21, v9, v21 |
| ; RV32ZVBC32-NEXT: vand.vv v22, v9, v22 |
| ; RV32ZVBC32-NEXT: vand.vv v23, v9, v23 |
| ; RV32ZVBC32-NEXT: vand.vv v24, v9, v24 |
| ; RV32ZVBC32-NEXT: vand.vv v25, v9, v25 |
| ; RV32ZVBC32-NEXT: vand.vv v26, v9, v26 |
| ; RV32ZVBC32-NEXT: vand.vv v27, v9, v27 |
| ; RV32ZVBC32-NEXT: vand.vv v28, v9, v28 |
| ; RV32ZVBC32-NEXT: vand.vv v29, v9, v29 |
| ; RV32ZVBC32-NEXT: vand.vv v30, v9, v30 |
| ; RV32ZVBC32-NEXT: vand.vv v31, v9, v31 |
| ; RV32ZVBC32-NEXT: vand.vv v7, v9, v7 |
| ; RV32ZVBC32-NEXT: vand.vv v6, v9, v6 |
| ; RV32ZVBC32-NEXT: vand.vv v5, v9, v5 |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (s11), zero |
| ; RV32ZVBC32-NEXT: vand.vv v4, v9, v4 |
| ; RV32ZVBC32-NEXT: vand.vv v3, v9, v3 |
| ; RV32ZVBC32-NEXT: vand.vv v11, v9, v11 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a2, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (ra), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (t5), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (s8), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v13, (s6), zero |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v10 |
| ; RV32ZVBC32-NEXT: vand.vv v11, v9, v11 |
| ; RV32ZVBC32-NEXT: vand.vv v12, v9, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a2, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v13, v9, v13 |
| ; RV32ZVBC32-NEXT: vand.vx v9, v9, a1 |
| ; RV32ZVBC32-NEXT: vmul.vv v9, v8, v9 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v2, v9 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v14 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v15 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v17 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v19 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v21 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v22 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v23 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v25 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v26 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v27 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v29 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v30 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v31 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v7 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v6 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v5 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v3 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v12, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v12 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v11 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v13 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vxor.vv v8, v9, v10, v0.t |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 3 |
| ; RV32ZVBC32-NEXT: sub a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add sp, sp, a0 |
| ; RV32ZVBC32-NEXT: .cfi_def_cfa sp, 352 |
| ; RV32ZVBC32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: .cfi_restore ra |
| ; RV32ZVBC32-NEXT: .cfi_restore s0 |
| ; RV32ZVBC32-NEXT: .cfi_restore s1 |
| ; RV32ZVBC32-NEXT: .cfi_restore s2 |
| ; RV32ZVBC32-NEXT: .cfi_restore s3 |
| ; RV32ZVBC32-NEXT: .cfi_restore s4 |
| ; RV32ZVBC32-NEXT: .cfi_restore s5 |
| ; RV32ZVBC32-NEXT: .cfi_restore s6 |
| ; RV32ZVBC32-NEXT: .cfi_restore s7 |
| ; RV32ZVBC32-NEXT: .cfi_restore s8 |
| ; RV32ZVBC32-NEXT: .cfi_restore s9 |
| ; RV32ZVBC32-NEXT: .cfi_restore s10 |
| ; RV32ZVBC32-NEXT: .cfi_restore s11 |
| ; RV32ZVBC32-NEXT: addi sp, sp, 352 |
| ; RV32ZVBC32-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv1i64_vv_mask: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: vsetvli a0, zero, e64, m1, ta, mu |
| ; RV64ZVBC32-NEXT: vand.vi v10, v9, 2 |
| ; RV64ZVBC32-NEXT: vand.vi v11, v9, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v11, v10 |
| ; RV64ZVBC32-NEXT: vand.vi v11, v9, 4 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vi v11, v9, 8 |
| ; RV64ZVBC32-NEXT: li a0, 16 |
| ; RV64ZVBC32-NEXT: li a1, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a0 |
| ; RV64ZVBC32-NEXT: li a0, 64 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: li a1, 128 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a0 |
| ; RV64ZVBC32-NEXT: li a0, 256 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: li a1, 512 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a0 |
| ; RV64ZVBC32-NEXT: li a2, 1024 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: li a0, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a2 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 11 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 1 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 2 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 4 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 8 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 16 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 64 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 128 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 256 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 512 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 1024 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 2048 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 4096 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 8192 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 16384 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 32768 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 65536 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 131072 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: lui a1, 262144 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 31 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 32 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 33 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 34 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 35 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 36 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 37 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 38 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 39 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 40 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 41 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 42 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 43 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 44 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 45 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 46 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 47 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 48 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 49 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 50 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 51 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 52 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 53 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 54 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 55 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 56 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 57 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 58 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 59 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 60 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: slli a1, a0, 61 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a1 |
| ; RV64ZVBC32-NEXT: li a1, -1 |
| ; RV64ZVBC32-NEXT: slli a0, a0, 62 |
| ; RV64ZVBC32-NEXT: slli a1, a1, 63 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vand.vx v11, v9, a0 |
| ; RV64ZVBC32-NEXT: vand.vx v9, v9, a1 |
| ; RV64ZVBC32-NEXT: vmul.vv v11, v8, v11 |
| ; RV64ZVBC32-NEXT: vxor.vv v10, v10, v11 |
| ; RV64ZVBC32-NEXT: vmul.vv v9, v8, v9 |
| ; RV64ZVBC32-NEXT: vxor.vv v8, v10, v9, v0.t |
| ; RV64ZVBC32-NEXT: ret |
| %v = call <vscale x 1 x i64> @llvm.clmul.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb) |
| %w = select <vscale x 1 x i1> %mask, <vscale x 1 x i64> %v, <vscale x 1 x i64> %va |
| ret <vscale x 1 x i64> %w |
| } |
| |
| define <vscale x 1 x i64> @clmul_nxv1i64_vx_mask(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %mask) { |
| ; RV32V-LABEL: clmul_nxv1i64_vx_mask: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: addi sp, sp, -352 |
| ; RV32V-NEXT: .cfi_def_cfa_offset 352 |
| ; RV32V-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32V-NEXT: .cfi_offset ra, -4 |
| ; RV32V-NEXT: .cfi_offset s0, -8 |
| ; RV32V-NEXT: .cfi_offset s1, -12 |
| ; RV32V-NEXT: .cfi_offset s2, -16 |
| ; RV32V-NEXT: .cfi_offset s3, -20 |
| ; RV32V-NEXT: .cfi_offset s4, -24 |
| ; RV32V-NEXT: .cfi_offset s5, -28 |
| ; RV32V-NEXT: .cfi_offset s6, -32 |
| ; RV32V-NEXT: .cfi_offset s7, -36 |
| ; RV32V-NEXT: .cfi_offset s8, -40 |
| ; RV32V-NEXT: .cfi_offset s9, -44 |
| ; RV32V-NEXT: .cfi_offset s10, -48 |
| ; RV32V-NEXT: .cfi_offset s11, -52 |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: mv a3, a2 |
| ; RV32V-NEXT: slli a2, a2, 1 |
| ; RV32V-NEXT: add a2, a2, a3 |
| ; RV32V-NEXT: sub sp, sp, a2 |
| ; RV32V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xe0, 0x02, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 352 + 6 * vlenb |
| ; RV32V-NEXT: csrr a2, vlenb |
| ; RV32V-NEXT: slli a3, a2, 2 |
| ; RV32V-NEXT: add a2, a3, a2 |
| ; RV32V-NEXT: add a2, sp, a2 |
| ; RV32V-NEXT: addi a2, a2, 288 |
| ; RV32V-NEXT: vs1r.v v0, (a2) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: sw a0, 8(sp) |
| ; RV32V-NEXT: sw a1, 12(sp) |
| ; RV32V-NEXT: addi s3, sp, 8 |
| ; RV32V-NEXT: lui s2, 524288 |
| ; RV32V-NEXT: li s11, 1 |
| ; RV32V-NEXT: li s5, 2 |
| ; RV32V-NEXT: li s7, 4 |
| ; RV32V-NEXT: li s10, 8 |
| ; RV32V-NEXT: li ra, 64 |
| ; RV32V-NEXT: li s9, 128 |
| ; RV32V-NEXT: li s8, 256 |
| ; RV32V-NEXT: li s6, 512 |
| ; RV32V-NEXT: li s4, 1024 |
| ; RV32V-NEXT: lui s1, 1 |
| ; RV32V-NEXT: lui s0, 2 |
| ; RV32V-NEXT: lui t6, 4 |
| ; RV32V-NEXT: lui t5, 8 |
| ; RV32V-NEXT: lui t4, 16 |
| ; RV32V-NEXT: lui t3, 32 |
| ; RV32V-NEXT: lui t2, 64 |
| ; RV32V-NEXT: lui t1, 128 |
| ; RV32V-NEXT: lui t0, 256 |
| ; RV32V-NEXT: lui a6, 512 |
| ; RV32V-NEXT: lui a5, 1024 |
| ; RV32V-NEXT: lui a4, 2048 |
| ; RV32V-NEXT: lui a3, 4096 |
| ; RV32V-NEXT: lui a2, 8192 |
| ; RV32V-NEXT: lui a0, 16384 |
| ; RV32V-NEXT: vsetvli a1, zero, e64, m1, ta, mu |
| ; RV32V-NEXT: vlse64.v v9, (s3), zero |
| ; RV32V-NEXT: lui s3, 32768 |
| ; RV32V-NEXT: sw s2, 16(sp) |
| ; RV32V-NEXT: lui a7, 524288 |
| ; RV32V-NEXT: sw zero, 20(sp) |
| ; RV32V-NEXT: sw zero, 272(sp) |
| ; RV32V-NEXT: sw s11, 276(sp) |
| ; RV32V-NEXT: sw zero, 264(sp) |
| ; RV32V-NEXT: sw s5, 268(sp) |
| ; RV32V-NEXT: lui s5, 65536 |
| ; RV32V-NEXT: sw zero, 256(sp) |
| ; RV32V-NEXT: sw s7, 260(sp) |
| ; RV32V-NEXT: lui s7, 131072 |
| ; RV32V-NEXT: sw zero, 248(sp) |
| ; RV32V-NEXT: sw s10, 252(sp) |
| ; RV32V-NEXT: lui a1, 262144 |
| ; RV32V-NEXT: sw zero, 240(sp) |
| ; RV32V-NEXT: li s2, 16 |
| ; RV32V-NEXT: sw s2, 244(sp) |
| ; RV32V-NEXT: li s10, 16 |
| ; RV32V-NEXT: sw zero, 232(sp) |
| ; RV32V-NEXT: li s2, 32 |
| ; RV32V-NEXT: sw s2, 236(sp) |
| ; RV32V-NEXT: sw zero, 224(sp) |
| ; RV32V-NEXT: sw ra, 228(sp) |
| ; RV32V-NEXT: sw zero, 216(sp) |
| ; RV32V-NEXT: sw s9, 220(sp) |
| ; RV32V-NEXT: sw zero, 208(sp) |
| ; RV32V-NEXT: sw s8, 212(sp) |
| ; RV32V-NEXT: li s2, 256 |
| ; RV32V-NEXT: sw zero, 200(sp) |
| ; RV32V-NEXT: sw s6, 204(sp) |
| ; RV32V-NEXT: sw zero, 192(sp) |
| ; RV32V-NEXT: sw s4, 196(sp) |
| ; RV32V-NEXT: slli s11, s11, 11 |
| ; RV32V-NEXT: sw zero, 184(sp) |
| ; RV32V-NEXT: sw s11, 188(sp) |
| ; RV32V-NEXT: sw zero, 176(sp) |
| ; RV32V-NEXT: sw s1, 180(sp) |
| ; RV32V-NEXT: sw zero, 168(sp) |
| ; RV32V-NEXT: sw s0, 172(sp) |
| ; RV32V-NEXT: sw zero, 160(sp) |
| ; RV32V-NEXT: sw t6, 164(sp) |
| ; RV32V-NEXT: sw zero, 152(sp) |
| ; RV32V-NEXT: sw t5, 156(sp) |
| ; RV32V-NEXT: lui s1, 8 |
| ; RV32V-NEXT: sw zero, 144(sp) |
| ; RV32V-NEXT: sw t4, 148(sp) |
| ; RV32V-NEXT: lui s0, 16 |
| ; RV32V-NEXT: sw zero, 136(sp) |
| ; RV32V-NEXT: sw t3, 140(sp) |
| ; RV32V-NEXT: lui t5, 32 |
| ; RV32V-NEXT: sw zero, 128(sp) |
| ; RV32V-NEXT: sw t2, 132(sp) |
| ; RV32V-NEXT: lui t4, 64 |
| ; RV32V-NEXT: sw zero, 120(sp) |
| ; RV32V-NEXT: sw t1, 124(sp) |
| ; RV32V-NEXT: lui t3, 128 |
| ; RV32V-NEXT: sw zero, 112(sp) |
| ; RV32V-NEXT: sw t0, 116(sp) |
| ; RV32V-NEXT: lui t2, 256 |
| ; RV32V-NEXT: sw zero, 104(sp) |
| ; RV32V-NEXT: sw a6, 108(sp) |
| ; RV32V-NEXT: lui t0, 512 |
| ; RV32V-NEXT: sw zero, 96(sp) |
| ; RV32V-NEXT: sw a5, 100(sp) |
| ; RV32V-NEXT: lui a6, 1024 |
| ; RV32V-NEXT: sw zero, 88(sp) |
| ; RV32V-NEXT: sw a4, 92(sp) |
| ; RV32V-NEXT: lui a5, 2048 |
| ; RV32V-NEXT: sw zero, 80(sp) |
| ; RV32V-NEXT: sw a3, 84(sp) |
| ; RV32V-NEXT: lui t1, 4096 |
| ; RV32V-NEXT: sw zero, 72(sp) |
| ; RV32V-NEXT: sw a2, 76(sp) |
| ; RV32V-NEXT: sw zero, 64(sp) |
| ; RV32V-NEXT: sw a0, 68(sp) |
| ; RV32V-NEXT: sw zero, 56(sp) |
| ; RV32V-NEXT: sw s3, 60(sp) |
| ; RV32V-NEXT: sw zero, 48(sp) |
| ; RV32V-NEXT: sw s5, 52(sp) |
| ; RV32V-NEXT: sw zero, 40(sp) |
| ; RV32V-NEXT: sw s7, 44(sp) |
| ; RV32V-NEXT: sw zero, 32(sp) |
| ; RV32V-NEXT: sw a1, 36(sp) |
| ; RV32V-NEXT: lui a3, 262144 |
| ; RV32V-NEXT: sw zero, 24(sp) |
| ; RV32V-NEXT: sw a7, 28(sp) |
| ; RV32V-NEXT: addi a1, sp, 16 |
| ; RV32V-NEXT: vlse64.v v12, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 272 |
| ; RV32V-NEXT: vlse64.v v3, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 264 |
| ; RV32V-NEXT: vlse64.v v10, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 256 |
| ; RV32V-NEXT: vlse64.v v15, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 248 |
| ; RV32V-NEXT: vlse64.v v16, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 240 |
| ; RV32V-NEXT: vlse64.v v17, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 232 |
| ; RV32V-NEXT: vlse64.v v18, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 224 |
| ; RV32V-NEXT: vlse64.v v19, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 216 |
| ; RV32V-NEXT: vlse64.v v20, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 208 |
| ; RV32V-NEXT: vlse64.v v21, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 200 |
| ; RV32V-NEXT: vlse64.v v22, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 192 |
| ; RV32V-NEXT: vlse64.v v23, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 184 |
| ; RV32V-NEXT: vlse64.v v24, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 176 |
| ; RV32V-NEXT: vlse64.v v25, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 168 |
| ; RV32V-NEXT: vlse64.v v26, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 160 |
| ; RV32V-NEXT: vlse64.v v27, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 152 |
| ; RV32V-NEXT: vlse64.v v28, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 144 |
| ; RV32V-NEXT: vlse64.v v29, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 136 |
| ; RV32V-NEXT: vlse64.v v30, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 128 |
| ; RV32V-NEXT: vlse64.v v31, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 120 |
| ; RV32V-NEXT: vlse64.v v7, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 112 |
| ; RV32V-NEXT: vlse64.v v6, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 104 |
| ; RV32V-NEXT: vlse64.v v5, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 96 |
| ; RV32V-NEXT: vlse64.v v4, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 88 |
| ; RV32V-NEXT: vlse64.v v0, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 80 |
| ; RV32V-NEXT: vlse64.v v11, (a1), zero |
| ; RV32V-NEXT: addi a1, sp, 72 |
| ; RV32V-NEXT: vlse64.v v13, (a1), zero |
| ; RV32V-NEXT: csrr a1, vlenb |
| ; RV32V-NEXT: slli a1, a1, 2 |
| ; RV32V-NEXT: add a1, sp, a1 |
| ; RV32V-NEXT: addi a1, a1, 288 |
| ; RV32V-NEXT: vs1r.v v13, (a1) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: addi ra, sp, 64 |
| ; RV32V-NEXT: vand.vi v2, v9, 2 |
| ; RV32V-NEXT: vand.vi v1, v9, 1 |
| ; RV32V-NEXT: vmul.vv v2, v8, v2 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v1, v2 |
| ; RV32V-NEXT: vand.vi v1, v9, 4 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vi v1, v9, 8 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, s10 |
| ; RV32V-NEXT: addi s10, sp, 56 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: li a1, 32 |
| ; RV32V-NEXT: vand.vx v1, v9, a1 |
| ; RV32V-NEXT: addi s9, sp, 48 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: li a1, 64 |
| ; RV32V-NEXT: vand.vx v1, v9, a1 |
| ; RV32V-NEXT: addi s8, sp, 40 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: li a1, 128 |
| ; RV32V-NEXT: vand.vx v1, v9, a1 |
| ; RV32V-NEXT: addi s6, sp, 32 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, s2 |
| ; RV32V-NEXT: addi s4, sp, 24 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: li a1, 512 |
| ; RV32V-NEXT: vand.vx v1, v9, a1 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: li a1, 1024 |
| ; RV32V-NEXT: vand.vx v1, v9, a1 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, s11 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: lui a1, 1 |
| ; RV32V-NEXT: vand.vx v1, v9, a1 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: lui a1, 2 |
| ; RV32V-NEXT: vand.vx v1, v9, a1 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, t6 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, s1 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, s0 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, t5 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, t4 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, t3 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, t2 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, t0 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, a6 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, a5 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, t1 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, a2 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, a0 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, s3 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, s5 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vand.vx v1, v9, s7 |
| ; RV32V-NEXT: vmul.vv v1, v8, v1 |
| ; RV32V-NEXT: vxor.vv v2, v2, v1 |
| ; RV32V-NEXT: vlse64.v v1, (ra), zero |
| ; RV32V-NEXT: vand.vv v12, v9, v12 |
| ; RV32V-NEXT: vand.vv v13, v9, v3 |
| ; RV32V-NEXT: vand.vv v14, v9, v10 |
| ; RV32V-NEXT: vand.vv v15, v9, v15 |
| ; RV32V-NEXT: vand.vv v16, v9, v16 |
| ; RV32V-NEXT: vand.vv v17, v9, v17 |
| ; RV32V-NEXT: vand.vv v18, v9, v18 |
| ; RV32V-NEXT: vand.vv v19, v9, v19 |
| ; RV32V-NEXT: vand.vv v20, v9, v20 |
| ; RV32V-NEXT: vand.vv v21, v9, v21 |
| ; RV32V-NEXT: vand.vv v22, v9, v22 |
| ; RV32V-NEXT: vand.vv v23, v9, v23 |
| ; RV32V-NEXT: vand.vv v24, v9, v24 |
| ; RV32V-NEXT: vand.vv v25, v9, v25 |
| ; RV32V-NEXT: vand.vv v26, v9, v26 |
| ; RV32V-NEXT: vand.vv v27, v9, v27 |
| ; RV32V-NEXT: vand.vv v28, v9, v28 |
| ; RV32V-NEXT: vand.vv v29, v9, v29 |
| ; RV32V-NEXT: vand.vv v30, v9, v30 |
| ; RV32V-NEXT: vand.vv v31, v9, v31 |
| ; RV32V-NEXT: vand.vv v7, v9, v7 |
| ; RV32V-NEXT: vand.vv v6, v9, v6 |
| ; RV32V-NEXT: vand.vv v5, v9, v5 |
| ; RV32V-NEXT: vand.vv v4, v9, v4 |
| ; RV32V-NEXT: vand.vv v0, v9, v0 |
| ; RV32V-NEXT: vlse64.v v3, (s10), zero |
| ; RV32V-NEXT: vand.vv v10, v9, v11 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vand.vv v10, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v10, v9, v1 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 1 |
| ; RV32V-NEXT: add a0, a1, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v10, v9, v3 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vlse64.v v3, (s9), zero |
| ; RV32V-NEXT: vlse64.v v1, (s8), zero |
| ; RV32V-NEXT: vlse64.v v10, (s6), zero |
| ; RV32V-NEXT: vlse64.v v11, (s4), zero |
| ; RV32V-NEXT: vand.vv v3, v9, v3 |
| ; RV32V-NEXT: vand.vv v1, v9, v1 |
| ; RV32V-NEXT: vand.vv v10, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32V-NEXT: vand.vv v11, v9, v11 |
| ; RV32V-NEXT: vand.vx v9, v9, a3 |
| ; RV32V-NEXT: vmul.vv v9, v8, v9 |
| ; RV32V-NEXT: vxor.vv v9, v2, v9 |
| ; RV32V-NEXT: vmul.vv v10, v8, v12 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v13 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v14 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v15 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v16 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v17 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v18 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v19 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v20 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v21 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v22 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v23 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v24 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v25 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v26 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v27 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v28 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v29 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v30 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v31 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v7 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v6 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v5 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v4 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v0 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: addi a0, sp, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 1 |
| ; RV32V-NEXT: add a0, a1, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 2 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v3 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v1 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vmul.vv v10, v8, v10 |
| ; RV32V-NEXT: vxor.vv v9, v9, v10 |
| ; RV32V-NEXT: vmul.vv v10, v8, v11 |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a1, a0, 2 |
| ; RV32V-NEXT: add a0, a1, a0 |
| ; RV32V-NEXT: add a0, sp, a0 |
| ; RV32V-NEXT: addi a0, a0, 288 |
| ; RV32V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload |
| ; RV32V-NEXT: vxor.vv v8, v9, v10, v0.t |
| ; RV32V-NEXT: csrr a0, vlenb |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: mv a1, a0 |
| ; RV32V-NEXT: slli a0, a0, 1 |
| ; RV32V-NEXT: add a0, a0, a1 |
| ; RV32V-NEXT: add sp, sp, a0 |
| ; RV32V-NEXT: .cfi_def_cfa sp, 352 |
| ; RV32V-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32V-NEXT: .cfi_restore ra |
| ; RV32V-NEXT: .cfi_restore s0 |
| ; RV32V-NEXT: .cfi_restore s1 |
| ; RV32V-NEXT: .cfi_restore s2 |
| ; RV32V-NEXT: .cfi_restore s3 |
| ; RV32V-NEXT: .cfi_restore s4 |
| ; RV32V-NEXT: .cfi_restore s5 |
| ; RV32V-NEXT: .cfi_restore s6 |
| ; RV32V-NEXT: .cfi_restore s7 |
| ; RV32V-NEXT: .cfi_restore s8 |
| ; RV32V-NEXT: .cfi_restore s9 |
| ; RV32V-NEXT: .cfi_restore s10 |
| ; RV32V-NEXT: .cfi_restore s11 |
| ; RV32V-NEXT: addi sp, sp, 352 |
| ; RV32V-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64V-LABEL: clmul_nxv1i64_vx_mask: |
| ; RV64V: # %bb.0: |
| ; RV64V-NEXT: andi a1, a0, 2 |
| ; RV64V-NEXT: andi a2, a0, 1 |
| ; RV64V-NEXT: vsetvli a3, zero, e64, m1, ta, mu |
| ; RV64V-NEXT: vmul.vx v9, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 4 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 8 |
| ; RV64V-NEXT: vxor.vv v9, v10, v9 |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 16 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 32 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 64 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 128 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: andi a1, a0, 256 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: andi a2, a0, 512 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: andi a3, a0, 1024 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: li a1, 1 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a3 |
| ; RV64V-NEXT: slli a2, a1, 11 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 1 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 2 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 4 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 8 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 16 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 32 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 64 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 128 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 256 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 512 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 1024 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 2048 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 4096 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 8192 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 16384 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 32768 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 65536 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 131072 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: lui a2, 262144 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: srliw a2, a0, 31 |
| ; RV64V-NEXT: slli a2, a2, 31 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 32 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 33 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 34 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 35 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 36 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 37 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 38 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 39 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 40 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 41 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 42 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 43 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 44 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 45 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 46 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 47 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 48 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 49 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 50 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 51 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 52 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 53 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 54 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 55 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 56 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 57 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 58 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 59 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 60 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: slli a2, a1, 61 |
| ; RV64V-NEXT: slli a1, a1, 62 |
| ; RV64V-NEXT: and a2, a0, a2 |
| ; RV64V-NEXT: and a1, a0, a1 |
| ; RV64V-NEXT: srli a0, a0, 63 |
| ; RV64V-NEXT: slli a0, a0, 63 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a2 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a1 |
| ; RV64V-NEXT: vxor.vv v9, v9, v10 |
| ; RV64V-NEXT: vmul.vx v10, v8, a0 |
| ; RV64V-NEXT: vxor.vv v8, v9, v10, v0.t |
| ; RV64V-NEXT: ret |
| ; |
| ; RV32ZVBC64-LABEL: clmul_nxv1i64_vx_mask: |
| ; RV32ZVBC64: # %bb.0: |
| ; RV32ZVBC64-NEXT: addi sp, sp, -16 |
| ; RV32ZVBC64-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVBC64-NEXT: sw a0, 8(sp) |
| ; RV32ZVBC64-NEXT: sw a1, 12(sp) |
| ; RV32ZVBC64-NEXT: addi a0, sp, 8 |
| ; RV32ZVBC64-NEXT: vsetvli a1, zero, e64, m1, ta, mu |
| ; RV32ZVBC64-NEXT: vlse64.v v9, (a0), zero |
| ; RV32ZVBC64-NEXT: vclmul.vv v8, v8, v9, v0.t |
| ; RV32ZVBC64-NEXT: addi sp, sp, 16 |
| ; RV32ZVBC64-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32ZVBC64-NEXT: ret |
| ; |
| ; RV64ZVBC64-LABEL: clmul_nxv1i64_vx_mask: |
| ; RV64ZVBC64: # %bb.0: |
| ; RV64ZVBC64-NEXT: vsetvli a1, zero, e64, m1, ta, mu |
| ; RV64ZVBC64-NEXT: vclmul.vx v8, v8, a0, v0.t |
| ; RV64ZVBC64-NEXT: ret |
| ; |
| ; RV32ZVBC32-LABEL: clmul_nxv1i64_vx_mask: |
| ; RV32ZVBC32: # %bb.0: |
| ; RV32ZVBC32-NEXT: addi sp, sp, -352 |
| ; RV32ZVBC32-NEXT: .cfi_def_cfa_offset 352 |
| ; RV32ZVBC32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill |
| ; RV32ZVBC32-NEXT: .cfi_offset ra, -4 |
| ; RV32ZVBC32-NEXT: .cfi_offset s0, -8 |
| ; RV32ZVBC32-NEXT: .cfi_offset s1, -12 |
| ; RV32ZVBC32-NEXT: .cfi_offset s2, -16 |
| ; RV32ZVBC32-NEXT: .cfi_offset s3, -20 |
| ; RV32ZVBC32-NEXT: .cfi_offset s4, -24 |
| ; RV32ZVBC32-NEXT: .cfi_offset s5, -28 |
| ; RV32ZVBC32-NEXT: .cfi_offset s6, -32 |
| ; RV32ZVBC32-NEXT: .cfi_offset s7, -36 |
| ; RV32ZVBC32-NEXT: .cfi_offset s8, -40 |
| ; RV32ZVBC32-NEXT: .cfi_offset s9, -44 |
| ; RV32ZVBC32-NEXT: .cfi_offset s10, -48 |
| ; RV32ZVBC32-NEXT: .cfi_offset s11, -52 |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: mv a3, a2 |
| ; RV32ZVBC32-NEXT: slli a2, a2, 1 |
| ; RV32ZVBC32-NEXT: add a2, a2, a3 |
| ; RV32ZVBC32-NEXT: sub sp, sp, a2 |
| ; RV32ZVBC32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xe0, 0x02, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 352 + 6 * vlenb |
| ; RV32ZVBC32-NEXT: csrr a2, vlenb |
| ; RV32ZVBC32-NEXT: slli a3, a2, 2 |
| ; RV32ZVBC32-NEXT: add a2, a3, a2 |
| ; RV32ZVBC32-NEXT: add a2, sp, a2 |
| ; RV32ZVBC32-NEXT: addi a2, a2, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v0, (a2) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: sw a0, 8(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 12(sp) |
| ; RV32ZVBC32-NEXT: addi s3, sp, 8 |
| ; RV32ZVBC32-NEXT: lui s2, 524288 |
| ; RV32ZVBC32-NEXT: li s11, 1 |
| ; RV32ZVBC32-NEXT: li s5, 2 |
| ; RV32ZVBC32-NEXT: li s7, 4 |
| ; RV32ZVBC32-NEXT: li s10, 8 |
| ; RV32ZVBC32-NEXT: li ra, 64 |
| ; RV32ZVBC32-NEXT: li s9, 128 |
| ; RV32ZVBC32-NEXT: li s8, 256 |
| ; RV32ZVBC32-NEXT: li s6, 512 |
| ; RV32ZVBC32-NEXT: li s4, 1024 |
| ; RV32ZVBC32-NEXT: lui s1, 1 |
| ; RV32ZVBC32-NEXT: lui s0, 2 |
| ; RV32ZVBC32-NEXT: lui t6, 4 |
| ; RV32ZVBC32-NEXT: lui t5, 8 |
| ; RV32ZVBC32-NEXT: lui t4, 16 |
| ; RV32ZVBC32-NEXT: lui t3, 32 |
| ; RV32ZVBC32-NEXT: lui t2, 64 |
| ; RV32ZVBC32-NEXT: lui t1, 128 |
| ; RV32ZVBC32-NEXT: lui t0, 256 |
| ; RV32ZVBC32-NEXT: lui a6, 512 |
| ; RV32ZVBC32-NEXT: lui a5, 1024 |
| ; RV32ZVBC32-NEXT: lui a4, 2048 |
| ; RV32ZVBC32-NEXT: lui a3, 4096 |
| ; RV32ZVBC32-NEXT: lui a2, 8192 |
| ; RV32ZVBC32-NEXT: lui a0, 16384 |
| ; RV32ZVBC32-NEXT: vsetvli a1, zero, e64, m1, ta, mu |
| ; RV32ZVBC32-NEXT: vlse64.v v9, (s3), zero |
| ; RV32ZVBC32-NEXT: lui s3, 32768 |
| ; RV32ZVBC32-NEXT: sw s2, 16(sp) |
| ; RV32ZVBC32-NEXT: lui a7, 524288 |
| ; RV32ZVBC32-NEXT: sw zero, 20(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 272(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 276(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 264(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 268(sp) |
| ; RV32ZVBC32-NEXT: lui s5, 65536 |
| ; RV32ZVBC32-NEXT: sw zero, 256(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 260(sp) |
| ; RV32ZVBC32-NEXT: lui s7, 131072 |
| ; RV32ZVBC32-NEXT: sw zero, 248(sp) |
| ; RV32ZVBC32-NEXT: sw s10, 252(sp) |
| ; RV32ZVBC32-NEXT: lui a1, 262144 |
| ; RV32ZVBC32-NEXT: sw zero, 240(sp) |
| ; RV32ZVBC32-NEXT: li s2, 16 |
| ; RV32ZVBC32-NEXT: sw s2, 244(sp) |
| ; RV32ZVBC32-NEXT: li s10, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 232(sp) |
| ; RV32ZVBC32-NEXT: li s2, 32 |
| ; RV32ZVBC32-NEXT: sw s2, 236(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 224(sp) |
| ; RV32ZVBC32-NEXT: sw ra, 228(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 216(sp) |
| ; RV32ZVBC32-NEXT: sw s9, 220(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 208(sp) |
| ; RV32ZVBC32-NEXT: sw s8, 212(sp) |
| ; RV32ZVBC32-NEXT: li s2, 256 |
| ; RV32ZVBC32-NEXT: sw zero, 200(sp) |
| ; RV32ZVBC32-NEXT: sw s6, 204(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 192(sp) |
| ; RV32ZVBC32-NEXT: sw s4, 196(sp) |
| ; RV32ZVBC32-NEXT: slli s11, s11, 11 |
| ; RV32ZVBC32-NEXT: sw zero, 184(sp) |
| ; RV32ZVBC32-NEXT: sw s11, 188(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 176(sp) |
| ; RV32ZVBC32-NEXT: sw s1, 180(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 168(sp) |
| ; RV32ZVBC32-NEXT: sw s0, 172(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 160(sp) |
| ; RV32ZVBC32-NEXT: sw t6, 164(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 152(sp) |
| ; RV32ZVBC32-NEXT: sw t5, 156(sp) |
| ; RV32ZVBC32-NEXT: lui s1, 8 |
| ; RV32ZVBC32-NEXT: sw zero, 144(sp) |
| ; RV32ZVBC32-NEXT: sw t4, 148(sp) |
| ; RV32ZVBC32-NEXT: lui s0, 16 |
| ; RV32ZVBC32-NEXT: sw zero, 136(sp) |
| ; RV32ZVBC32-NEXT: sw t3, 140(sp) |
| ; RV32ZVBC32-NEXT: lui t5, 32 |
| ; RV32ZVBC32-NEXT: sw zero, 128(sp) |
| ; RV32ZVBC32-NEXT: sw t2, 132(sp) |
| ; RV32ZVBC32-NEXT: lui t4, 64 |
| ; RV32ZVBC32-NEXT: sw zero, 120(sp) |
| ; RV32ZVBC32-NEXT: sw t1, 124(sp) |
| ; RV32ZVBC32-NEXT: lui t3, 128 |
| ; RV32ZVBC32-NEXT: sw zero, 112(sp) |
| ; RV32ZVBC32-NEXT: sw t0, 116(sp) |
| ; RV32ZVBC32-NEXT: lui t2, 256 |
| ; RV32ZVBC32-NEXT: sw zero, 104(sp) |
| ; RV32ZVBC32-NEXT: sw a6, 108(sp) |
| ; RV32ZVBC32-NEXT: lui t0, 512 |
| ; RV32ZVBC32-NEXT: sw zero, 96(sp) |
| ; RV32ZVBC32-NEXT: sw a5, 100(sp) |
| ; RV32ZVBC32-NEXT: lui a6, 1024 |
| ; RV32ZVBC32-NEXT: sw zero, 88(sp) |
| ; RV32ZVBC32-NEXT: sw a4, 92(sp) |
| ; RV32ZVBC32-NEXT: lui a5, 2048 |
| ; RV32ZVBC32-NEXT: sw zero, 80(sp) |
| ; RV32ZVBC32-NEXT: sw a3, 84(sp) |
| ; RV32ZVBC32-NEXT: lui t1, 4096 |
| ; RV32ZVBC32-NEXT: sw zero, 72(sp) |
| ; RV32ZVBC32-NEXT: sw a2, 76(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 64(sp) |
| ; RV32ZVBC32-NEXT: sw a0, 68(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 56(sp) |
| ; RV32ZVBC32-NEXT: sw s3, 60(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 48(sp) |
| ; RV32ZVBC32-NEXT: sw s5, 52(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 40(sp) |
| ; RV32ZVBC32-NEXT: sw s7, 44(sp) |
| ; RV32ZVBC32-NEXT: sw zero, 32(sp) |
| ; RV32ZVBC32-NEXT: sw a1, 36(sp) |
| ; RV32ZVBC32-NEXT: lui a3, 262144 |
| ; RV32ZVBC32-NEXT: sw zero, 24(sp) |
| ; RV32ZVBC32-NEXT: sw a7, 28(sp) |
| ; RV32ZVBC32-NEXT: addi a1, sp, 16 |
| ; RV32ZVBC32-NEXT: vlse64.v v12, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 272 |
| ; RV32ZVBC32-NEXT: vlse64.v v3, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 264 |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 256 |
| ; RV32ZVBC32-NEXT: vlse64.v v15, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 248 |
| ; RV32ZVBC32-NEXT: vlse64.v v16, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 240 |
| ; RV32ZVBC32-NEXT: vlse64.v v17, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 232 |
| ; RV32ZVBC32-NEXT: vlse64.v v18, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 224 |
| ; RV32ZVBC32-NEXT: vlse64.v v19, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 216 |
| ; RV32ZVBC32-NEXT: vlse64.v v20, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 208 |
| ; RV32ZVBC32-NEXT: vlse64.v v21, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 200 |
| ; RV32ZVBC32-NEXT: vlse64.v v22, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 192 |
| ; RV32ZVBC32-NEXT: vlse64.v v23, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 184 |
| ; RV32ZVBC32-NEXT: vlse64.v v24, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 176 |
| ; RV32ZVBC32-NEXT: vlse64.v v25, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 168 |
| ; RV32ZVBC32-NEXT: vlse64.v v26, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 160 |
| ; RV32ZVBC32-NEXT: vlse64.v v27, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 152 |
| ; RV32ZVBC32-NEXT: vlse64.v v28, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 144 |
| ; RV32ZVBC32-NEXT: vlse64.v v29, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 136 |
| ; RV32ZVBC32-NEXT: vlse64.v v30, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 128 |
| ; RV32ZVBC32-NEXT: vlse64.v v31, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 120 |
| ; RV32ZVBC32-NEXT: vlse64.v v7, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 112 |
| ; RV32ZVBC32-NEXT: vlse64.v v6, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 104 |
| ; RV32ZVBC32-NEXT: vlse64.v v5, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 96 |
| ; RV32ZVBC32-NEXT: vlse64.v v4, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 88 |
| ; RV32ZVBC32-NEXT: vlse64.v v0, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 80 |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (a1), zero |
| ; RV32ZVBC32-NEXT: addi a1, sp, 72 |
| ; RV32ZVBC32-NEXT: vlse64.v v13, (a1), zero |
| ; RV32ZVBC32-NEXT: csrr a1, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a1, 2 |
| ; RV32ZVBC32-NEXT: add a1, sp, a1 |
| ; RV32ZVBC32-NEXT: addi a1, a1, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v13, (a1) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: addi ra, sp, 64 |
| ; RV32ZVBC32-NEXT: vand.vi v2, v9, 2 |
| ; RV32ZVBC32-NEXT: vand.vi v1, v9, 1 |
| ; RV32ZVBC32-NEXT: vmul.vv v2, v8, v2 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v1, v2 |
| ; RV32ZVBC32-NEXT: vand.vi v1, v9, 4 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vi v1, v9, 8 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, s10 |
| ; RV32ZVBC32-NEXT: addi s10, sp, 56 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: li a1, 32 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a1 |
| ; RV32ZVBC32-NEXT: addi s9, sp, 48 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: li a1, 64 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a1 |
| ; RV32ZVBC32-NEXT: addi s8, sp, 40 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: li a1, 128 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a1 |
| ; RV32ZVBC32-NEXT: addi s6, sp, 32 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, s2 |
| ; RV32ZVBC32-NEXT: addi s4, sp, 24 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: li a1, 512 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a1 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: li a1, 1024 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a1 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, s11 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: lui a1, 1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a1 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: lui a1, 2 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a1 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, t6 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, s1 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, s0 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, t5 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, t4 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, t3 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, t2 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, t0 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a6 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a5 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, t1 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a2 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, a0 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, s3 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, s5 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vand.vx v1, v9, s7 |
| ; RV32ZVBC32-NEXT: vmul.vv v1, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v2, v2, v1 |
| ; RV32ZVBC32-NEXT: vlse64.v v1, (ra), zero |
| ; RV32ZVBC32-NEXT: vand.vv v12, v9, v12 |
| ; RV32ZVBC32-NEXT: vand.vv v13, v9, v3 |
| ; RV32ZVBC32-NEXT: vand.vv v14, v9, v10 |
| ; RV32ZVBC32-NEXT: vand.vv v15, v9, v15 |
| ; RV32ZVBC32-NEXT: vand.vv v16, v9, v16 |
| ; RV32ZVBC32-NEXT: vand.vv v17, v9, v17 |
| ; RV32ZVBC32-NEXT: vand.vv v18, v9, v18 |
| ; RV32ZVBC32-NEXT: vand.vv v19, v9, v19 |
| ; RV32ZVBC32-NEXT: vand.vv v20, v9, v20 |
| ; RV32ZVBC32-NEXT: vand.vv v21, v9, v21 |
| ; RV32ZVBC32-NEXT: vand.vv v22, v9, v22 |
| ; RV32ZVBC32-NEXT: vand.vv v23, v9, v23 |
| ; RV32ZVBC32-NEXT: vand.vv v24, v9, v24 |
| ; RV32ZVBC32-NEXT: vand.vv v25, v9, v25 |
| ; RV32ZVBC32-NEXT: vand.vv v26, v9, v26 |
| ; RV32ZVBC32-NEXT: vand.vv v27, v9, v27 |
| ; RV32ZVBC32-NEXT: vand.vv v28, v9, v28 |
| ; RV32ZVBC32-NEXT: vand.vv v29, v9, v29 |
| ; RV32ZVBC32-NEXT: vand.vv v30, v9, v30 |
| ; RV32ZVBC32-NEXT: vand.vv v31, v9, v31 |
| ; RV32ZVBC32-NEXT: vand.vv v7, v9, v7 |
| ; RV32ZVBC32-NEXT: vand.vv v6, v9, v6 |
| ; RV32ZVBC32-NEXT: vand.vv v5, v9, v5 |
| ; RV32ZVBC32-NEXT: vand.vv v4, v9, v4 |
| ; RV32ZVBC32-NEXT: vand.vv v0, v9, v0 |
| ; RV32ZVBC32-NEXT: vlse64.v v3, (s10), zero |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v11 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v1 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v3 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vlse64.v v3, (s9), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v1, (s8), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v10, (s6), zero |
| ; RV32ZVBC32-NEXT: vlse64.v v11, (s4), zero |
| ; RV32ZVBC32-NEXT: vand.vv v3, v9, v3 |
| ; RV32ZVBC32-NEXT: vand.vv v1, v9, v1 |
| ; RV32ZVBC32-NEXT: vand.vv v10, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill |
| ; RV32ZVBC32-NEXT: vand.vv v11, v9, v11 |
| ; RV32ZVBC32-NEXT: vand.vx v9, v9, a3 |
| ; RV32ZVBC32-NEXT: vmul.vv v9, v8, v9 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v2, v9 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v12 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v13 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v14 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v15 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v16 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v17 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v18 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v19 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v20 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v21 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v22 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v23 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v24 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v25 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v26 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v27 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v28 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v29 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v30 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v31 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v7 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v6 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v5 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v4 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v0 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: addi a0, sp, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v3 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v1 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v10 |
| ; RV32ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV32ZVBC32-NEXT: vmul.vv v10, v8, v11 |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a1, a0, 2 |
| ; RV32ZVBC32-NEXT: add a0, a1, a0 |
| ; RV32ZVBC32-NEXT: add a0, sp, a0 |
| ; RV32ZVBC32-NEXT: addi a0, a0, 288 |
| ; RV32ZVBC32-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload |
| ; RV32ZVBC32-NEXT: vxor.vv v8, v9, v10, v0.t |
| ; RV32ZVBC32-NEXT: csrr a0, vlenb |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: mv a1, a0 |
| ; RV32ZVBC32-NEXT: slli a0, a0, 1 |
| ; RV32ZVBC32-NEXT: add a0, a0, a1 |
| ; RV32ZVBC32-NEXT: add sp, sp, a0 |
| ; RV32ZVBC32-NEXT: .cfi_def_cfa sp, 352 |
| ; RV32ZVBC32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload |
| ; RV32ZVBC32-NEXT: .cfi_restore ra |
| ; RV32ZVBC32-NEXT: .cfi_restore s0 |
| ; RV32ZVBC32-NEXT: .cfi_restore s1 |
| ; RV32ZVBC32-NEXT: .cfi_restore s2 |
| ; RV32ZVBC32-NEXT: .cfi_restore s3 |
| ; RV32ZVBC32-NEXT: .cfi_restore s4 |
| ; RV32ZVBC32-NEXT: .cfi_restore s5 |
| ; RV32ZVBC32-NEXT: .cfi_restore s6 |
| ; RV32ZVBC32-NEXT: .cfi_restore s7 |
| ; RV32ZVBC32-NEXT: .cfi_restore s8 |
| ; RV32ZVBC32-NEXT: .cfi_restore s9 |
| ; RV32ZVBC32-NEXT: .cfi_restore s10 |
| ; RV32ZVBC32-NEXT: .cfi_restore s11 |
| ; RV32ZVBC32-NEXT: addi sp, sp, 352 |
| ; RV32ZVBC32-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32ZVBC32-NEXT: ret |
| ; |
| ; RV64ZVBC32-LABEL: clmul_nxv1i64_vx_mask: |
| ; RV64ZVBC32: # %bb.0: |
| ; RV64ZVBC32-NEXT: andi a1, a0, 2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 1 |
| ; RV64ZVBC32-NEXT: vsetvli a3, zero, e64, m1, ta, mu |
| ; RV64ZVBC32-NEXT: vmul.vx v9, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 4 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 8 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v10, v9 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 16 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 32 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 64 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 128 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a1, a0, 256 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: andi a2, a0, 512 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: andi a3, a0, 1024 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: li a1, 1 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a3 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 11 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 1 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 2 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 4 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 8 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 16 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 32 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 64 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 128 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 256 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 512 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 1024 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 2048 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 4096 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 8192 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 16384 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 32768 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 65536 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 131072 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: lui a2, 262144 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: srliw a2, a0, 31 |
| ; RV64ZVBC32-NEXT: slli a2, a2, 31 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 32 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 33 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 34 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 35 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 36 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 37 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 38 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 39 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 40 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 41 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 42 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 43 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 44 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 45 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 46 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 47 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 48 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 49 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 50 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 51 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 52 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 53 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 54 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 55 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 56 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 57 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 58 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 59 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 60 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: slli a2, a1, 61 |
| ; RV64ZVBC32-NEXT: slli a1, a1, 62 |
| ; RV64ZVBC32-NEXT: and a2, a0, a2 |
| ; RV64ZVBC32-NEXT: and a1, a0, a1 |
| ; RV64ZVBC32-NEXT: srli a0, a0, 63 |
| ; RV64ZVBC32-NEXT: slli a0, a0, 63 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a2 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a1 |
| ; RV64ZVBC32-NEXT: vxor.vv v9, v9, v10 |
| ; RV64ZVBC32-NEXT: vmul.vx v10, v8, a0 |
| ; RV64ZVBC32-NEXT: vxor.vv v8, v9, v10, v0.t |
| ; RV64ZVBC32-NEXT: ret |
| %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 |
| %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer |
| %v = call <vscale x 1 x i64> @llvm.clmul.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb) |
| %w = select <vscale x 1 x i1> %mask, <vscale x 1 x i64> %v, <vscale x 1 x i64> %va |
| ret <vscale x 1 x i64> %w |
| } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; CHECK: {{.*}} |
| ; RV32: {{.*}} |
| ; RV32ZVBC: {{.*}} |
| ; RV64: {{.*}} |
| ; RV64ZVBC: {{.*}} |