blob: 5c017fe72886c0eaae7e6a5d22e966e91dd81c64 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,RV64
define <vscale x 1 x i32> @clmul_nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %y) nounwind {
; CHECK-LABEL: clmul_nxv1i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-NEXT: vand.vi v10, v9, 2
; CHECK-NEXT: vand.vi v11, v9, 1
; CHECK-NEXT: vmul.vv v10, v8, v10
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v11, v10
; CHECK-NEXT: vand.vi v11, v9, 4
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vi v11, v9, 8
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 256
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 512
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 1024
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: slli a0, a0, 11
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 2
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 32
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 64
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 128
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 256
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 512
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 1024
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 2048
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 4096
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 8192
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 16384
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 32768
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 65536
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 262144
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vand.vx v9, v9, a0
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vxor.vv v8, v10, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i32> @llvm.clmul.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %y)
ret <vscale x 1 x i32> %a
}
define <vscale x 2 x i32> @clmul_nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) nounwind {
; CHECK-LABEL: clmul_nxv2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-NEXT: vand.vi v10, v9, 2
; CHECK-NEXT: vand.vi v11, v9, 1
; CHECK-NEXT: vmul.vv v10, v8, v10
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v11, v10
; CHECK-NEXT: vand.vi v11, v9, 4
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vi v11, v9, 8
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 256
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 512
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 1024
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: slli a0, a0, 11
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 2
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 32
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 64
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 128
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 256
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 512
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 1024
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 2048
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 4096
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 8192
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 16384
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 32768
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 65536
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 262144
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vand.vx v9, v9, a0
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vxor.vv v8, v10, v8
; CHECK-NEXT: ret
%a = call <vscale x 2 x i32> @llvm.clmul.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y)
ret <vscale x 2 x i32> %a
}
define <vscale x 4 x i32> @clmul_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) nounwind {
; CHECK-LABEL: clmul_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vand.vi v12, v10, 2
; CHECK-NEXT: vand.vi v14, v10, 1
; CHECK-NEXT: vmul.vv v12, v8, v12
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v14, v12
; CHECK-NEXT: vand.vi v14, v10, 4
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vi v14, v10, 8
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: li a0, 256
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: li a0, 512
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: li a0, 1024
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: slli a0, a0, 11
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 2
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 32
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 64
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 128
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 256
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 512
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 1024
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 2048
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 4096
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 8192
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 16384
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 32768
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 65536
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 262144
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vand.vx v14, v10, a0
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vand.vx v10, v10, a0
; CHECK-NEXT: vmul.vv v14, v8, v14
; CHECK-NEXT: vxor.vv v12, v12, v14
; CHECK-NEXT: vmul.vv v8, v8, v10
; CHECK-NEXT: vxor.vv v8, v12, v8
; CHECK-NEXT: ret
%a = call <vscale x 4 x i32> @llvm.clmul.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
ret <vscale x 4 x i32> %a
}
define <vscale x 8 x i32> @clmul_nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) nounwind {
; CHECK-LABEL: clmul_nxv8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-NEXT: vand.vi v12, v8, 2
; CHECK-NEXT: vand.vi v16, v8, 1
; CHECK-NEXT: vmul.vv v12, v8, v12
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v16, v12
; CHECK-NEXT: vand.vi v16, v8, 4
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vi v16, v8, 8
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: li a0, 256
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: li a0, 512
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: li a0, 1024
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: slli a0, a0, 11
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 2
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 32
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 64
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 128
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 256
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 512
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 1024
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 2048
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 4096
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 8192
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 16384
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 32768
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 65536
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 262144
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vmul.vv v16, v8, v16
; CHECK-NEXT: vxor.vv v12, v12, v16
; CHECK-NEXT: vand.vx v16, v8, a0
; CHECK-NEXT: vmul.vv v8, v8, v16
; CHECK-NEXT: vxor.vv v8, v12, v8
; CHECK-NEXT: ret
%a = call <vscale x 8 x i32> @llvm.clmul.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %x)
ret <vscale x 8 x i32> %a
}
define <vscale x 16 x i32> @clmul_nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y) nounwind {
; CHECK-LABEL: clmul_nxv16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vand.vi v24, v16, 2
; CHECK-NEXT: vand.vi v0, v16, 1
; CHECK-NEXT: vmul.vv v24, v8, v24
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v0, v24
; CHECK-NEXT: vand.vi v0, v16, 4
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vi v0, v16, 8
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: li a0, 256
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: li a0, 512
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: li a0, 1024
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: slli a0, a0, 11
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 2
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 32
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 64
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 128
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 256
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 512
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 1024
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 2048
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 4096
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 8192
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 16384
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 32768
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 65536
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 131072
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 262144
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vand.vx v0, v16, a0
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: vand.vx v16, v16, a0
; CHECK-NEXT: vmul.vv v0, v8, v0
; CHECK-NEXT: vxor.vv v24, v24, v0
; CHECK-NEXT: vmul.vv v8, v8, v16
; CHECK-NEXT: vxor.vv v8, v24, v8
; CHECK-NEXT: ret
%a = call <vscale x 16 x i32> @llvm.clmul.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %y)
ret <vscale x 16 x i32> %a
}
define <vscale x 1 x i64> @clmul_nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %y) nounwind {
; RV32-LABEL: clmul_nxv1i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -352
; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a1, a0, 3
; RV32-NEXT: sub a0, a1, a0
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: li t5, 1
; RV32-NEXT: li a4, 2
; RV32-NEXT: li a2, 4
; RV32-NEXT: li s11, 8
; RV32-NEXT: li a0, 16
; RV32-NEXT: li ra, 32
; RV32-NEXT: li s10, 64
; RV32-NEXT: li s9, 128
; RV32-NEXT: li s8, 256
; RV32-NEXT: li s7, 512
; RV32-NEXT: li s1, 1024
; RV32-NEXT: lui s6, 1
; RV32-NEXT: lui s5, 2
; RV32-NEXT: lui s4, 4
; RV32-NEXT: lui s3, 8
; RV32-NEXT: lui s2, 16
; RV32-NEXT: lui s0, 32
; RV32-NEXT: lui t6, 64
; RV32-NEXT: lui t4, 128
; RV32-NEXT: lui t3, 256
; RV32-NEXT: lui t2, 512
; RV32-NEXT: lui t1, 1024
; RV32-NEXT: lui t0, 2048
; RV32-NEXT: lui a7, 4096
; RV32-NEXT: lui a6, 8192
; RV32-NEXT: lui a5, 16384
; RV32-NEXT: lui a3, 32768
; RV32-NEXT: sw a1, 272(sp)
; RV32-NEXT: sw zero, 276(sp)
; RV32-NEXT: sw zero, 264(sp)
; RV32-NEXT: sw t5, 268(sp)
; RV32-NEXT: sw zero, 256(sp)
; RV32-NEXT: sw a4, 260(sp)
; RV32-NEXT: lui a4, 65536
; RV32-NEXT: sw zero, 248(sp)
; RV32-NEXT: sw a2, 252(sp)
; RV32-NEXT: lui a2, 131072
; RV32-NEXT: sw zero, 240(sp)
; RV32-NEXT: sw s11, 244(sp)
; RV32-NEXT: vsetvli s11, zero, e64, m1, ta, ma
; RV32-NEXT: vand.vi v13, v9, 2
; RV32-NEXT: vand.vi v14, v9, 1
; RV32-NEXT: vand.vi v12, v9, 4
; RV32-NEXT: vand.vi v11, v9, 8
; RV32-NEXT: sw zero, 232(sp)
; RV32-NEXT: sw a0, 236(sp)
; RV32-NEXT: vand.vx v10, v9, a0
; RV32-NEXT: addi s11, sp, 272
; RV32-NEXT: sw zero, 224(sp)
; RV32-NEXT: sw ra, 228(sp)
; RV32-NEXT: vand.vx v15, v9, ra
; RV32-NEXT: addi ra, sp, 264
; RV32-NEXT: sw zero, 216(sp)
; RV32-NEXT: sw s10, 220(sp)
; RV32-NEXT: vand.vx v16, v9, s10
; RV32-NEXT: addi s10, sp, 256
; RV32-NEXT: sw zero, 208(sp)
; RV32-NEXT: sw s9, 212(sp)
; RV32-NEXT: vand.vx v17, v9, s9
; RV32-NEXT: addi s9, sp, 248
; RV32-NEXT: sw zero, 200(sp)
; RV32-NEXT: sw s8, 204(sp)
; RV32-NEXT: vand.vx v18, v9, s8
; RV32-NEXT: addi s8, sp, 240
; RV32-NEXT: sw zero, 192(sp)
; RV32-NEXT: sw s7, 196(sp)
; RV32-NEXT: vand.vx v19, v9, s7
; RV32-NEXT: addi s7, sp, 232
; RV32-NEXT: sw zero, 184(sp)
; RV32-NEXT: sw s1, 188(sp)
; RV32-NEXT: vand.vx v20, v9, s1
; RV32-NEXT: slli t5, t5, 11
; RV32-NEXT: vand.vx v21, v9, s6
; RV32-NEXT: sw zero, 176(sp)
; RV32-NEXT: sw t5, 180(sp)
; RV32-NEXT: sw zero, 168(sp)
; RV32-NEXT: sw s6, 172(sp)
; RV32-NEXT: addi s6, sp, 216
; RV32-NEXT: vand.vx v22, v9, s5
; RV32-NEXT: sw zero, 160(sp)
; RV32-NEXT: sw s5, 164(sp)
; RV32-NEXT: addi s5, sp, 208
; RV32-NEXT: vand.vx v23, v9, s4
; RV32-NEXT: sw zero, 152(sp)
; RV32-NEXT: sw s4, 156(sp)
; RV32-NEXT: addi s4, sp, 200
; RV32-NEXT: vand.vx v24, v9, s3
; RV32-NEXT: sw zero, 144(sp)
; RV32-NEXT: sw s3, 148(sp)
; RV32-NEXT: addi s3, sp, 192
; RV32-NEXT: vand.vx v25, v9, s2
; RV32-NEXT: sw zero, 136(sp)
; RV32-NEXT: sw s2, 140(sp)
; RV32-NEXT: addi s2, sp, 184
; RV32-NEXT: vand.vx v26, v9, s0
; RV32-NEXT: sw zero, 128(sp)
; RV32-NEXT: sw s0, 132(sp)
; RV32-NEXT: addi s1, sp, 176
; RV32-NEXT: vand.vx v27, v9, t6
; RV32-NEXT: sw zero, 120(sp)
; RV32-NEXT: sw t6, 124(sp)
; RV32-NEXT: addi s0, sp, 168
; RV32-NEXT: vand.vx v28, v9, t4
; RV32-NEXT: sw zero, 112(sp)
; RV32-NEXT: sw t4, 116(sp)
; RV32-NEXT: addi t6, sp, 160
; RV32-NEXT: vand.vx v29, v9, t3
; RV32-NEXT: sw zero, 104(sp)
; RV32-NEXT: sw t3, 108(sp)
; RV32-NEXT: addi t4, sp, 152
; RV32-NEXT: vand.vx v30, v9, t2
; RV32-NEXT: sw zero, 96(sp)
; RV32-NEXT: sw t2, 100(sp)
; RV32-NEXT: addi t3, sp, 144
; RV32-NEXT: vand.vx v31, v9, t1
; RV32-NEXT: sw zero, 88(sp)
; RV32-NEXT: sw t1, 92(sp)
; RV32-NEXT: addi t2, sp, 136
; RV32-NEXT: vand.vx v7, v9, t0
; RV32-NEXT: sw zero, 80(sp)
; RV32-NEXT: sw t0, 84(sp)
; RV32-NEXT: addi t1, sp, 128
; RV32-NEXT: vand.vx v6, v9, a7
; RV32-NEXT: sw zero, 72(sp)
; RV32-NEXT: sw a7, 76(sp)
; RV32-NEXT: addi t0, sp, 120
; RV32-NEXT: vand.vx v5, v9, a6
; RV32-NEXT: sw zero, 64(sp)
; RV32-NEXT: sw a6, 68(sp)
; RV32-NEXT: addi a7, sp, 112
; RV32-NEXT: vand.vx v4, v9, a5
; RV32-NEXT: sw zero, 56(sp)
; RV32-NEXT: sw a5, 60(sp)
; RV32-NEXT: addi a6, sp, 104
; RV32-NEXT: vand.vx v3, v9, a3
; RV32-NEXT: sw zero, 48(sp)
; RV32-NEXT: sw a3, 52(sp)
; RV32-NEXT: addi a5, sp, 96
; RV32-NEXT: vand.vx v2, v9, a4
; RV32-NEXT: sw zero, 40(sp)
; RV32-NEXT: sw a4, 44(sp)
; RV32-NEXT: addi a4, sp, 88
; RV32-NEXT: vand.vx v1, v9, a2
; RV32-NEXT: sw zero, 32(sp)
; RV32-NEXT: sw a2, 36(sp)
; RV32-NEXT: addi a3, sp, 80
; RV32-NEXT: sw zero, 24(sp)
; RV32-NEXT: lui a0, 262144
; RV32-NEXT: sw a0, 28(sp)
; RV32-NEXT: sw zero, 16(sp)
; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: addi a2, sp, 72
; RV32-NEXT: vand.vx v0, v9, t5
; RV32-NEXT: addi a1, sp, 64
; RV32-NEXT: vmul.vv v13, v8, v13
; RV32-NEXT: vmul.vv v14, v8, v14
; RV32-NEXT: vxor.vi v14, v14, 0
; RV32-NEXT: vxor.vv v14, v14, v13
; RV32-NEXT: vlse64.v v13, (s11), zero
; RV32-NEXT: addi s11, sp, 56
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v14, v14, v12
; RV32-NEXT: vlse64.v v12, (ra), zero
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: mv ra, t5
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: add t5, t5, ra
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vs1r.v v12, (t5) # vscale x 8-byte Folded Spill
; RV32-NEXT: addi ra, sp, 48
; RV32-NEXT: vmul.vv v11, v8, v11
; RV32-NEXT: vxor.vv v14, v14, v11
; RV32-NEXT: vlse64.v v11, (s10), zero
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli s10, t5, 2
; RV32-NEXT: add t5, s10, t5
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vs1r.v v11, (t5) # vscale x 8-byte Folded Spill
; RV32-NEXT: addi s10, sp, 40
; RV32-NEXT: vmul.vv v10, v8, v10
; RV32-NEXT: vxor.vv v14, v14, v10
; RV32-NEXT: vlse64.v v10, (s9), zero
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 2
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vs1r.v v10, (t5) # vscale x 8-byte Folded Spill
; RV32-NEXT: addi t5, sp, 32
; RV32-NEXT: vmul.vv v15, v8, v15
; RV32-NEXT: vxor.vv v15, v14, v15
; RV32-NEXT: vlse64.v v10, (s8), zero
; RV32-NEXT: csrr s8, vlenb
; RV32-NEXT: slli s9, s8, 1
; RV32-NEXT: add s8, s9, s8
; RV32-NEXT: add s8, sp, s8
; RV32-NEXT: addi s8, s8, 288
; RV32-NEXT: vs1r.v v10, (s8) # vscale x 8-byte Folded Spill
; RV32-NEXT: addi s8, sp, 24
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v16, v15, v16
; RV32-NEXT: vlse64.v v10, (s7), zero
; RV32-NEXT: csrr s7, vlenb
; RV32-NEXT: slli s7, s7, 1
; RV32-NEXT: add s7, sp, s7
; RV32-NEXT: addi s7, s7, 288
; RV32-NEXT: vs1r.v v10, (s7) # vscale x 8-byte Folded Spill
; RV32-NEXT: addi s7, sp, 16
; RV32-NEXT: vmul.vv v17, v8, v17
; RV32-NEXT: vmul.vv v18, v8, v18
; RV32-NEXT: vmul.vv v19, v8, v19
; RV32-NEXT: vmul.vv v20, v8, v20
; RV32-NEXT: vmul.vv v21, v8, v21
; RV32-NEXT: vmul.vv v22, v8, v22
; RV32-NEXT: vmul.vv v23, v8, v23
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vmul.vv v25, v8, v25
; RV32-NEXT: vmul.vv v26, v8, v26
; RV32-NEXT: vmul.vv v27, v8, v27
; RV32-NEXT: vmul.vv v28, v8, v28
; RV32-NEXT: vmul.vv v29, v8, v29
; RV32-NEXT: vmul.vv v30, v8, v30
; RV32-NEXT: vmul.vv v31, v8, v31
; RV32-NEXT: vmul.vv v7, v8, v7
; RV32-NEXT: vmul.vv v6, v8, v6
; RV32-NEXT: vmul.vv v5, v8, v5
; RV32-NEXT: vmul.vv v4, v8, v4
; RV32-NEXT: vmul.vv v3, v8, v3
; RV32-NEXT: vmul.vv v2, v8, v2
; RV32-NEXT: vmul.vv v1, v8, v1
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v16, v16, v17
; RV32-NEXT: addi s9, sp, 224
; RV32-NEXT: vlse64.v v11, (s9), zero
; RV32-NEXT: vxor.vv v16, v16, v18
; RV32-NEXT: vlse64.v v10, (s6), zero
; RV32-NEXT: csrr s6, vlenb
; RV32-NEXT: add s6, sp, s6
; RV32-NEXT: addi s6, s6, 288
; RV32-NEXT: vs1r.v v10, (s6) # vscale x 8-byte Folded Spill
; RV32-NEXT: vxor.vv v16, v16, v19
; RV32-NEXT: vlse64.v v10, (s5), zero
; RV32-NEXT: addi s5, sp, 288
; RV32-NEXT: vs1r.v v10, (s5) # vscale x 8-byte Folded Spill
; RV32-NEXT: vxor.vv v16, v16, v20
; RV32-NEXT: vlse64.v v12, (s4), zero
; RV32-NEXT: vxor.vv v16, v16, v0
; RV32-NEXT: vlse64.v v0, (s3), zero
; RV32-NEXT: vxor.vv v16, v16, v21
; RV32-NEXT: vlse64.v v21, (s2), zero
; RV32-NEXT: vxor.vv v16, v16, v22
; RV32-NEXT: vlse64.v v22, (s1), zero
; RV32-NEXT: vxor.vv v16, v16, v23
; RV32-NEXT: vlse64.v v23, (s0), zero
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: vlse64.v v24, (t6), zero
; RV32-NEXT: vxor.vv v16, v16, v25
; RV32-NEXT: vlse64.v v25, (t4), zero
; RV32-NEXT: vxor.vv v16, v16, v26
; RV32-NEXT: vlse64.v v26, (t3), zero
; RV32-NEXT: vxor.vv v16, v16, v27
; RV32-NEXT: vlse64.v v27, (t2), zero
; RV32-NEXT: vxor.vv v16, v16, v28
; RV32-NEXT: vlse64.v v28, (t1), zero
; RV32-NEXT: vxor.vv v16, v16, v29
; RV32-NEXT: vlse64.v v29, (t0), zero
; RV32-NEXT: vxor.vv v16, v16, v30
; RV32-NEXT: vlse64.v v30, (a7), zero
; RV32-NEXT: vxor.vv v16, v16, v31
; RV32-NEXT: vlse64.v v31, (a6), zero
; RV32-NEXT: vxor.vv v16, v16, v7
; RV32-NEXT: vlse64.v v7, (a5), zero
; RV32-NEXT: vxor.vv v16, v16, v6
; RV32-NEXT: vlse64.v v6, (a4), zero
; RV32-NEXT: vxor.vv v16, v16, v5
; RV32-NEXT: vlse64.v v5, (a3), zero
; RV32-NEXT: vxor.vv v16, v16, v4
; RV32-NEXT: vlse64.v v4, (a2), zero
; RV32-NEXT: vxor.vv v16, v16, v3
; RV32-NEXT: vlse64.v v3, (a1), zero
; RV32-NEXT: vxor.vv v16, v16, v2
; RV32-NEXT: vlse64.v v2, (s11), zero
; RV32-NEXT: vxor.vv v1, v16, v1
; RV32-NEXT: vlse64.v v10, (ra), zero
; RV32-NEXT: vand.vv v13, v9, v13
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: mv a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl1r.v v14, (a1) # vscale x 8-byte Folded Reload
; RV32-NEXT: vand.vv v14, v9, v14
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a2, a1, 2
; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl1r.v v15, (a1) # vscale x 8-byte Folded Reload
; RV32-NEXT: vand.vv v15, v9, v15
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl1r.v v16, (a1) # vscale x 8-byte Folded Reload
; RV32-NEXT: vand.vv v16, v9, v16
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a2, a1, 1
; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl1r.v v17, (a1) # vscale x 8-byte Folded Reload
; RV32-NEXT: vand.vv v17, v9, v17
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl1r.v v18, (a1) # vscale x 8-byte Folded Reload
; RV32-NEXT: vand.vv v18, v9, v18
; RV32-NEXT: vand.vv v19, v9, v11
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload
; RV32-NEXT: vand.vv v20, v9, v11
; RV32-NEXT: addi a1, sp, 288
; RV32-NEXT: vl1r.v v11, (a1) # vscale x 8-byte Folded Reload
; RV32-NEXT: vand.vv v11, v9, v11
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
; RV32-NEXT: vand.vv v11, v9, v12
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a2, a1, 1
; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
; RV32-NEXT: vand.vv v0, v9, v0
; RV32-NEXT: vand.vv v21, v9, v21
; RV32-NEXT: vand.vv v22, v9, v22
; RV32-NEXT: vand.vv v23, v9, v23
; RV32-NEXT: vand.vv v24, v9, v24
; RV32-NEXT: vand.vv v25, v9, v25
; RV32-NEXT: vand.vv v26, v9, v26
; RV32-NEXT: vand.vv v27, v9, v27
; RV32-NEXT: vand.vv v28, v9, v28
; RV32-NEXT: vand.vv v29, v9, v29
; RV32-NEXT: vand.vv v30, v9, v30
; RV32-NEXT: vand.vv v31, v9, v31
; RV32-NEXT: vand.vv v7, v9, v7
; RV32-NEXT: vand.vv v6, v9, v6
; RV32-NEXT: vand.vv v5, v9, v5
; RV32-NEXT: vand.vv v4, v9, v4
; RV32-NEXT: vand.vv v11, v9, v3
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
; RV32-NEXT: vand.vv v2, v9, v2
; RV32-NEXT: vand.vv v10, v9, v10
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: mv a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs1r.v v10, (a1) # vscale x 8-byte Folded Spill
; RV32-NEXT: vlse64.v v10, (s10), zero
; RV32-NEXT: vlse64.v v3, (t5), zero
; RV32-NEXT: vlse64.v v11, (s8), zero
; RV32-NEXT: vlse64.v v12, (s7), zero
; RV32-NEXT: vand.vv v10, v9, v10
; RV32-NEXT: vand.vv v3, v9, v3
; RV32-NEXT: vand.vv v11, v9, v11
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a2, a1, 2
; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs1r.v v11, (a1) # vscale x 8-byte Folded Spill
; RV32-NEXT: vand.vv v12, v9, v12
; RV32-NEXT: vand.vx v9, v9, a0
; RV32-NEXT: vmul.vv v9, v8, v9
; RV32-NEXT: vxor.vv v9, v1, v9
; RV32-NEXT: vmul.vv v11, v8, v13
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v14
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v15
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v16
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v17
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v18
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v19
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v20
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
; RV32-NEXT: vmul.vv v11, v8, v11
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a1, a0, 1
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
; RV32-NEXT: vmul.vv v11, v8, v11
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v0
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v21
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v22
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v23
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v24
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v25
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v26
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v27
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v28
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v29
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v30
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v31
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v7
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v6
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v5
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v4
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
; RV32-NEXT: vmul.vv v11, v8, v11
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v11, v8, v2
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
; RV32-NEXT: vmul.vv v11, v8, v11
; RV32-NEXT: vxor.vv v9, v9, v11
; RV32-NEXT: vmul.vv v10, v8, v10
; RV32-NEXT: vxor.vv v9, v9, v10
; RV32-NEXT: vmul.vv v10, v8, v3
; RV32-NEXT: vxor.vv v9, v9, v10
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a1, a0, 2
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
; RV32-NEXT: vmul.vv v10, v8, v10
; RV32-NEXT: vxor.vv v9, v9, v10
; RV32-NEXT: vmul.vv v8, v8, v12
; RV32-NEXT: vxor.vv v8, v9, v8
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a1, a0, 3
; RV32-NEXT: sub a0, a1, a0
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 352
; RV32-NEXT: ret
;
; RV64-LABEL: clmul_nxv1i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vand.vi v10, v9, 2
; RV64-NEXT: vand.vi v11, v9, 1
; RV64-NEXT: vmul.vv v10, v8, v10
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v11, v10
; RV64-NEXT: vand.vi v11, v9, 4
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vi v11, v9, 8
; RV64-NEXT: li a0, 16
; RV64-NEXT: li a1, 32
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a0
; RV64-NEXT: li a0, 64
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: li a1, 128
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a0
; RV64-NEXT: li a0, 256
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: li a1, 512
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a0
; RV64-NEXT: li a2, 1024
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: li a0, 1
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a2
; RV64-NEXT: slli a1, a0, 11
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 1
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 2
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 4
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 8
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 16
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 32
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 64
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 128
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 256
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 512
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 1024
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 2048
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 4096
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 8192
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 16384
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 32768
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 65536
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 131072
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: lui a1, 262144
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 31
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 33
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 34
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 35
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 36
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 37
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 38
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 39
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 40
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 41
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 42
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 43
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 44
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 45
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 46
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 47
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 48
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 49
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 50
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 51
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 52
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 53
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 54
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 55
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 56
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 57
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 58
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 59
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 60
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: slli a1, a0, 61
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: slli a0, a0, 62
; RV64-NEXT: slli a1, a1, 63
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vand.vx v11, v9, a0
; RV64-NEXT: vand.vx v9, v9, a1
; RV64-NEXT: vmul.vv v11, v8, v11
; RV64-NEXT: vxor.vv v10, v10, v11
; RV64-NEXT: vmul.vv v8, v8, v9
; RV64-NEXT: vxor.vv v8, v10, v8
; RV64-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.clmul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %y)
ret <vscale x 1 x i64> %a
}
define <vscale x 2 x i64> @clmul_nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) nounwind {
; RV32-LABEL: clmul_nxv2i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -352
; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: li s2, 1
; RV32-NEXT: li a3, 2
; RV32-NEXT: li a2, 4
; RV32-NEXT: li s7, 8
; RV32-NEXT: li a0, 16
; RV32-NEXT: li s6, 32
; RV32-NEXT: li s5, 64
; RV32-NEXT: li s4, 128
; RV32-NEXT: li s1, 256
; RV32-NEXT: li s0, 512
; RV32-NEXT: li t5, 1024
; RV32-NEXT: lui ra, 1
; RV32-NEXT: lui s8, 2
; RV32-NEXT: lui s10, 4
; RV32-NEXT: lui s11, 8
; RV32-NEXT: lui s9, 16
; RV32-NEXT: lui s3, 32
; RV32-NEXT: lui t6, 64
; RV32-NEXT: lui t4, 128
; RV32-NEXT: lui t3, 256
; RV32-NEXT: lui t2, 512
; RV32-NEXT: lui t1, 1024
; RV32-NEXT: lui t0, 2048
; RV32-NEXT: lui a7, 4096
; RV32-NEXT: lui a6, 8192
; RV32-NEXT: lui a5, 16384
; RV32-NEXT: lui a4, 32768
; RV32-NEXT: sw a1, 272(sp)
; RV32-NEXT: sw zero, 276(sp)
; RV32-NEXT: sw zero, 264(sp)
; RV32-NEXT: sw s2, 268(sp)
; RV32-NEXT: sw zero, 256(sp)
; RV32-NEXT: sw a3, 260(sp)
; RV32-NEXT: lui a3, 65536
; RV32-NEXT: sw zero, 248(sp)
; RV32-NEXT: sw a2, 252(sp)
; RV32-NEXT: lui a2, 131072
; RV32-NEXT: sw zero, 240(sp)
; RV32-NEXT: sw s7, 244(sp)
; RV32-NEXT: vsetvli s7, zero, e64, m2, ta, ma
; RV32-NEXT: vand.vi v28, v10, 2
; RV32-NEXT: vand.vi v20, v10, 1
; RV32-NEXT: vand.vi v30, v10, 4
; RV32-NEXT: vand.vi v14, v10, 8
; RV32-NEXT: sw zero, 232(sp)
; RV32-NEXT: sw a0, 236(sp)
; RV32-NEXT: vand.vx v12, v10, a0
; RV32-NEXT: addi s7, sp, 272
; RV32-NEXT: sw zero, 224(sp)
; RV32-NEXT: sw s6, 228(sp)
; RV32-NEXT: vand.vx v16, v10, s6
; RV32-NEXT: addi s6, sp, 264
; RV32-NEXT: sw zero, 216(sp)
; RV32-NEXT: sw s5, 220(sp)
; RV32-NEXT: vand.vx v18, v10, s5
; RV32-NEXT: addi s5, sp, 256
; RV32-NEXT: sw zero, 208(sp)
; RV32-NEXT: sw s4, 212(sp)
; RV32-NEXT: vand.vx v0, v10, s4
; RV32-NEXT: addi s4, sp, 248
; RV32-NEXT: sw zero, 200(sp)
; RV32-NEXT: sw s1, 204(sp)
; RV32-NEXT: vand.vx v6, v10, s1
; RV32-NEXT: addi s1, sp, 240
; RV32-NEXT: sw zero, 192(sp)
; RV32-NEXT: sw s0, 196(sp)
; RV32-NEXT: vand.vx v4, v10, s0
; RV32-NEXT: addi s0, sp, 232
; RV32-NEXT: sw zero, 184(sp)
; RV32-NEXT: sw t5, 188(sp)
; RV32-NEXT: vand.vx v2, v10, t5
; RV32-NEXT: slli s2, s2, 11
; RV32-NEXT: vand.vx v24, v10, ra
; RV32-NEXT: sw zero, 176(sp)
; RV32-NEXT: sw s2, 180(sp)
; RV32-NEXT: sw zero, 168(sp)
; RV32-NEXT: sw ra, 172(sp)
; RV32-NEXT: addi t5, sp, 216
; RV32-NEXT: vand.vx v26, v10, s8
; RV32-NEXT: sw zero, 160(sp)
; RV32-NEXT: sw s8, 164(sp)
; RV32-NEXT: addi s8, sp, 208
; RV32-NEXT: vand.vx v22, v10, s10
; RV32-NEXT: sw zero, 152(sp)
; RV32-NEXT: sw s10, 156(sp)
; RV32-NEXT: addi s10, sp, 200
; RV32-NEXT: vmul.vv v28, v8, v28
; RV32-NEXT: vmul.vv v20, v8, v20
; RV32-NEXT: vxor.vi v20, v20, 0
; RV32-NEXT: vxor.vv v20, v20, v28
; RV32-NEXT: vand.vx v28, v10, s11
; RV32-NEXT: sw zero, 144(sp)
; RV32-NEXT: sw s11, 148(sp)
; RV32-NEXT: addi s11, sp, 192
; RV32-NEXT: vmul.vv v30, v8, v30
; RV32-NEXT: vxor.vv v20, v20, v30
; RV32-NEXT: vand.vx v30, v10, s9
; RV32-NEXT: sw zero, 136(sp)
; RV32-NEXT: sw s9, 140(sp)
; RV32-NEXT: addi s9, sp, 184
; RV32-NEXT: vmul.vv v14, v8, v14
; RV32-NEXT: vxor.vv v14, v20, v14
; RV32-NEXT: vand.vx v20, v10, s3
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv ra, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, ra
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs2r.v v20, (a0) # vscale x 16-byte Folded Spill
; RV32-NEXT: sw zero, 128(sp)
; RV32-NEXT: sw s3, 132(sp)
; RV32-NEXT: addi s3, sp, 176
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v12, v14, v12
; RV32-NEXT: vand.vx v14, v10, t6
; RV32-NEXT: sw zero, 120(sp)
; RV32-NEXT: sw t6, 124(sp)
; RV32-NEXT: addi t6, sp, 168
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: vand.vx v16, v10, t4
; RV32-NEXT: sw zero, 112(sp)
; RV32-NEXT: sw t4, 116(sp)
; RV32-NEXT: addi t4, sp, 160
; RV32-NEXT: vmul.vv v18, v8, v18
; RV32-NEXT: vxor.vv v18, v12, v18
; RV32-NEXT: vand.vx v12, v10, t3
; RV32-NEXT: sw zero, 104(sp)
; RV32-NEXT: sw t3, 108(sp)
; RV32-NEXT: addi t3, sp, 152
; RV32-NEXT: vmul.vv v20, v8, v0
; RV32-NEXT: vxor.vv v18, v18, v20
; RV32-NEXT: vand.vx v20, v10, t2
; RV32-NEXT: sw zero, 96(sp)
; RV32-NEXT: sw t2, 100(sp)
; RV32-NEXT: addi t2, sp, 144
; RV32-NEXT: vmul.vv v6, v8, v6
; RV32-NEXT: vxor.vv v18, v18, v6
; RV32-NEXT: vand.vx v6, v10, t1
; RV32-NEXT: sw zero, 88(sp)
; RV32-NEXT: sw t1, 92(sp)
; RV32-NEXT: addi t1, sp, 136
; RV32-NEXT: vmul.vv v4, v8, v4
; RV32-NEXT: vxor.vv v18, v18, v4
; RV32-NEXT: vand.vx v4, v10, t0
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv ra, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add ra, ra, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a0, ra
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs2r.v v4, (a0) # vscale x 16-byte Folded Spill
; RV32-NEXT: sw zero, 80(sp)
; RV32-NEXT: sw t0, 84(sp)
; RV32-NEXT: addi t0, sp, 128
; RV32-NEXT: vmul.vv v2, v8, v2
; RV32-NEXT: vxor.vv v18, v18, v2
; RV32-NEXT: vand.vx v2, v10, s2
; RV32-NEXT: addi ra, sp, 120
; RV32-NEXT: vmul.vv v2, v8, v2
; RV32-NEXT: vxor.vv v18, v18, v2
; RV32-NEXT: vand.vx v2, v10, a7
; RV32-NEXT: sw zero, 72(sp)
; RV32-NEXT: sw a7, 76(sp)
; RV32-NEXT: addi a7, sp, 112
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v18, v18, v24
; RV32-NEXT: vand.vx v4, v10, a6
; RV32-NEXT: sw zero, 64(sp)
; RV32-NEXT: sw a6, 68(sp)
; RV32-NEXT: addi a6, sp, 104
; RV32-NEXT: vmul.vv v26, v8, v26
; RV32-NEXT: vxor.vv v18, v18, v26
; RV32-NEXT: vand.vx v26, v10, a5
; RV32-NEXT: sw zero, 56(sp)
; RV32-NEXT: sw a5, 60(sp)
; RV32-NEXT: addi a5, sp, 96
; RV32-NEXT: vmul.vv v22, v8, v22
; RV32-NEXT: vxor.vv v18, v18, v22
; RV32-NEXT: vand.vx v24, v10, a4
; RV32-NEXT: sw zero, 48(sp)
; RV32-NEXT: sw a4, 52(sp)
; RV32-NEXT: addi a4, sp, 88
; RV32-NEXT: vmul.vv v28, v8, v28
; RV32-NEXT: vxor.vv v18, v18, v28
; RV32-NEXT: vand.vx v28, v10, a3
; RV32-NEXT: sw zero, 40(sp)
; RV32-NEXT: sw a3, 44(sp)
; RV32-NEXT: addi a3, sp, 80
; RV32-NEXT: vmul.vv v30, v8, v30
; RV32-NEXT: vxor.vv v18, v18, v30
; RV32-NEXT: vand.vx v30, v10, a2
; RV32-NEXT: sw zero, 32(sp)
; RV32-NEXT: sw a2, 36(sp)
; RV32-NEXT: addi a2, sp, 72
; RV32-NEXT: sw zero, 24(sp)
; RV32-NEXT: lui a0, 262144
; RV32-NEXT: sw a0, 28(sp)
; RV32-NEXT: sw zero, 16(sp)
; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: addi a1, sp, 64
; RV32-NEXT: sw a6, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: csrr a6, vlenb
; RV32-NEXT: slli a6, a6, 3
; RV32-NEXT: mv s2, a6
; RV32-NEXT: slli a6, a6, 2
; RV32-NEXT: add a6, a6, s2
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 288
; RV32-NEXT: vl2r.v v22, (a6) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v0, v8, v22
; RV32-NEXT: vxor.vv v0, v18, v0
; RV32-NEXT: vlse64.v v18, (s7), zero
; RV32-NEXT: csrr a6, vlenb
; RV32-NEXT: slli a6, a6, 3
; RV32-NEXT: mv s2, a6
; RV32-NEXT: slli a6, a6, 2
; RV32-NEXT: add a6, a6, s2
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 288
; RV32-NEXT: vs2r.v v18, (a6) # vscale x 16-byte Folded Spill
; RV32-NEXT: addi s7, sp, 56
; RV32-NEXT: vmul.vv v14, v8, v14
; RV32-NEXT: vxor.vv v14, v0, v14
; RV32-NEXT: vlse64.v v18, (s6), zero
; RV32-NEXT: csrr a6, vlenb
; RV32-NEXT: slli a6, a6, 2
; RV32-NEXT: mv s2, a6
; RV32-NEXT: slli a6, a6, 3
; RV32-NEXT: add a6, a6, s2
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 288
; RV32-NEXT: vs2r.v v18, (a6) # vscale x 16-byte Folded Spill
; RV32-NEXT: addi s2, sp, 48
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v14, v14, v16
; RV32-NEXT: vlse64.v v16, (s5), zero
; RV32-NEXT: csrr a6, vlenb
; RV32-NEXT: slli a6, a6, 1
; RV32-NEXT: mv s5, a6
; RV32-NEXT: slli a6, a6, 4
; RV32-NEXT: add a6, a6, s5
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 288
; RV32-NEXT: vs2r.v v16, (a6) # vscale x 16-byte Folded Spill
; RV32-NEXT: addi s5, sp, 40
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v12, v14, v12
; RV32-NEXT: vlse64.v v14, (s4), zero
; RV32-NEXT: csrr a6, vlenb
; RV32-NEXT: slli a6, a6, 5
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 288
; RV32-NEXT: vs2r.v v14, (a6) # vscale x 16-byte Folded Spill
; RV32-NEXT: addi s4, sp, 32
; RV32-NEXT: vmul.vv v20, v8, v20
; RV32-NEXT: vxor.vv v20, v12, v20
; RV32-NEXT: vlse64.v v12, (s1), zero
; RV32-NEXT: csrr a6, vlenb
; RV32-NEXT: slli a6, a6, 1
; RV32-NEXT: mv s1, a6
; RV32-NEXT: slli a6, a6, 1
; RV32-NEXT: add s1, s1, a6
; RV32-NEXT: slli a6, a6, 1
; RV32-NEXT: add s1, s1, a6
; RV32-NEXT: slli a6, a6, 1
; RV32-NEXT: add a6, a6, s1
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 288
; RV32-NEXT: vs2r.v v12, (a6) # vscale x 16-byte Folded Spill
; RV32-NEXT: addi s1, sp, 24
; RV32-NEXT: vmul.vv v6, v8, v6
; RV32-NEXT: vxor.vv v20, v20, v6
; RV32-NEXT: vlse64.v v12, (s0), zero
; RV32-NEXT: csrr a6, vlenb
; RV32-NEXT: slli a6, a6, 2
; RV32-NEXT: mv s0, a6
; RV32-NEXT: slli a6, a6, 1
; RV32-NEXT: add s0, s0, a6
; RV32-NEXT: slli a6, a6, 1
; RV32-NEXT: add a6, a6, s0
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 288
; RV32-NEXT: vs2r.v v12, (a6) # vscale x 16-byte Folded Spill
; RV32-NEXT: addi s0, sp, 16
; RV32-NEXT: csrr s6, vlenb
; RV32-NEXT: slli s6, s6, 1
; RV32-NEXT: mv a6, s6
; RV32-NEXT: slli s6, s6, 1
; RV32-NEXT: add a6, a6, s6
; RV32-NEXT: slli s6, s6, 3
; RV32-NEXT: add s6, s6, a6
; RV32-NEXT: lw a6, 4(sp) # 4-byte Folded Reload
; RV32-NEXT: add s6, sp, s6
; RV32-NEXT: addi s6, s6, 288
; RV32-NEXT: vl2r.v v12, (s6) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v6, v8, v12
; RV32-NEXT: vmul.vv v2, v8, v2
; RV32-NEXT: vmul.vv v4, v8, v4
; RV32-NEXT: vmul.vv v26, v8, v26
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vmul.vv v28, v8, v28
; RV32-NEXT: vmul.vv v30, v8, v30
; RV32-NEXT: vxor.vv v20, v20, v6
; RV32-NEXT: addi s6, sp, 224
; RV32-NEXT: vlse64.v v0, (s6), zero
; RV32-NEXT: vxor.vv v20, v20, v2
; RV32-NEXT: vlse64.v v6, (t5), zero
; RV32-NEXT: vxor.vv v20, v20, v4
; RV32-NEXT: vlse64.v v22, (s8), zero
; RV32-NEXT: vxor.vv v20, v20, v26
; RV32-NEXT: vlse64.v v18, (s10), zero
; RV32-NEXT: vxor.vv v20, v20, v24
; RV32-NEXT: vlse64.v v16, (s11), zero
; RV32-NEXT: vxor.vv v20, v20, v28
; RV32-NEXT: vlse64.v v14, (s9), zero
; RV32-NEXT: vxor.vv v2, v20, v30
; RV32-NEXT: vlse64.v v12, (s3), zero
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 3
; RV32-NEXT: mv s3, t5
; RV32-NEXT: slli t5, t5, 2
; RV32-NEXT: add t5, t5, s3
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
; RV32-NEXT: vand.vv v26, v10, v20
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 2
; RV32-NEXT: mv s3, t5
; RV32-NEXT: slli t5, t5, 3
; RV32-NEXT: add t5, t5, s3
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
; RV32-NEXT: vand.vv v4, v10, v20
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: mv s3, t5
; RV32-NEXT: slli t5, t5, 4
; RV32-NEXT: add t5, t5, s3
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
; RV32-NEXT: vand.vv v30, v10, v20
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 5
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vl2r.v v20, (t5) # vscale x 16-byte Folded Reload
; RV32-NEXT: vand.vv v20, v10, v20
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: mv s3, t5
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: add s3, s3, t5
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: add s3, s3, t5
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: add t5, t5, s3
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vl2r.v v24, (t5) # vscale x 16-byte Folded Reload
; RV32-NEXT: vand.vv v28, v10, v24
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 2
; RV32-NEXT: mv s3, t5
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: add s3, s3, t5
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: add t5, t5, s3
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vl2r.v v24, (t5) # vscale x 16-byte Folded Reload
; RV32-NEXT: vand.vv v24, v10, v24
; RV32-NEXT: vand.vv v0, v10, v0
; RV32-NEXT: vand.vv v6, v10, v6
; RV32-NEXT: vand.vv v22, v10, v22
; RV32-NEXT: vand.vv v18, v10, v18
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 3
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vs2r.v v18, (t5) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v16, v10, v16
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 2
; RV32-NEXT: mv s3, t5
; RV32-NEXT: slli t5, t5, 2
; RV32-NEXT: add t5, t5, s3
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vs2r.v v16, (t5) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v14, v10, v14
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: mv s3, t5
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: add s3, s3, t5
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: add s3, s3, t5
; RV32-NEXT: slli t5, t5, 1
; RV32-NEXT: add t5, t5, s3
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vs2r.v v14, (t5) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v12
; RV32-NEXT: csrr t5, vlenb
; RV32-NEXT: slli t5, t5, 3
; RV32-NEXT: mv s3, t5
; RV32-NEXT: slli t5, t5, 2
; RV32-NEXT: add t5, t5, s3
; RV32-NEXT: add t5, sp, t5
; RV32-NEXT: addi t5, t5, 288
; RV32-NEXT: vs2r.v v12, (t5) # vscale x 16-byte Folded Spill
; RV32-NEXT: vlse64.v v12, (t6), zero
; RV32-NEXT: vlse64.v v14, (t4), zero
; RV32-NEXT: vlse64.v v16, (t3), zero
; RV32-NEXT: vlse64.v v18, (t2), zero
; RV32-NEXT: vand.vv v12, v10, v12
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: mv t3, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, t3
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v14
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: mv t3, t2
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: add t2, t2, t3
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v16
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 2
; RV32-NEXT: mv t3, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t3, t3, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, t3
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v18
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: mv t3, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t3, t3, t2
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: add t2, t2, t3
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs2r.v v12, (t2) # vscale x 16-byte Folded Spill
; RV32-NEXT: vlse64.v v12, (t1), zero
; RV32-NEXT: vlse64.v v14, (t0), zero
; RV32-NEXT: vlse64.v v16, (ra), zero
; RV32-NEXT: vlse64.v v18, (a7), zero
; RV32-NEXT: vand.vv v12, v10, v12
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 2
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v14
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 4
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v16
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 1
; RV32-NEXT: mv t0, a7
; RV32-NEXT: slli a7, a7, 2
; RV32-NEXT: add t0, t0, a7
; RV32-NEXT: slli a7, a7, 1
; RV32-NEXT: add a7, a7, t0
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v18
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 2
; RV32-NEXT: mv t0, a7
; RV32-NEXT: slli a7, a7, 3
; RV32-NEXT: add a7, a7, t0
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vs2r.v v12, (a7) # vscale x 16-byte Folded Spill
; RV32-NEXT: vlse64.v v12, (a6), zero
; RV32-NEXT: vlse64.v v14, (a5), zero
; RV32-NEXT: vlse64.v v16, (a4), zero
; RV32-NEXT: vlse64.v v18, (a3), zero
; RV32-NEXT: vand.vv v12, v10, v12
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 288
; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v14
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: mv a4, a3
; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: add a4, a4, a3
; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: add a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 288
; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v16
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: mv a4, a3
; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: add a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 288
; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v18
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: mv a4, a3
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 288
; RV32-NEXT: vs2r.v v12, (a3) # vscale x 16-byte Folded Spill
; RV32-NEXT: vlse64.v v12, (a2), zero
; RV32-NEXT: vlse64.v v14, (a1), zero
; RV32-NEXT: vlse64.v v16, (s7), zero
; RV32-NEXT: vlse64.v v18, (s2), zero
; RV32-NEXT: vand.vv v12, v10, v12
; RV32-NEXT: addi a1, sp, 288
; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v14
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: mv a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v16
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: mv a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a2, a2, a1
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v12, v10, v18
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs2r.v v12, (a1) # vscale x 16-byte Folded Spill
; RV32-NEXT: vlse64.v v14, (s5), zero
; RV32-NEXT: vlse64.v v16, (s4), zero
; RV32-NEXT: vlse64.v v18, (s1), zero
; RV32-NEXT: vlse64.v v12, (s0), zero
; RV32-NEXT: vand.vv v14, v10, v14
; RV32-NEXT: vand.vv v16, v10, v16
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: mv a2, a1
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs2r.v v16, (a1) # vscale x 16-byte Folded Spill
; RV32-NEXT: vand.vv v18, v10, v18
; RV32-NEXT: vand.vv v16, v10, v12
; RV32-NEXT: vand.vx v10, v10, a0
; RV32-NEXT: vmul.vv v10, v8, v10
; RV32-NEXT: vxor.vv v10, v2, v10
; RV32-NEXT: vmul.vv v12, v8, v26
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v12, v8, v4
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v12, v8, v30
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v12, v8, v20
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v12, v8, v28
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v12, v8, v24
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v12, v8, v0
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v12, v8, v6
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v12, v8, v22
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: addi a0, sp, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v12, v8, v14
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v12, v8, v18
; RV32-NEXT: vxor.vv v10, v10, v12
; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: vxor.vv v8, v10, v8
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 352
; RV32-NEXT: ret
;
; RV64-LABEL: clmul_nxv2i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vand.vi v12, v10, 2
; RV64-NEXT: vand.vi v14, v10, 1
; RV64-NEXT: vmul.vv v12, v8, v12
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v14, v12
; RV64-NEXT: vand.vi v14, v10, 4
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vi v14, v10, 8
; RV64-NEXT: li a0, 16
; RV64-NEXT: li a1, 32
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a0
; RV64-NEXT: li a0, 64
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: li a1, 128
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a0
; RV64-NEXT: li a0, 256
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: li a1, 512
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a0
; RV64-NEXT: li a2, 1024
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: li a0, 1
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a2
; RV64-NEXT: slli a1, a0, 11
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 1
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 2
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 4
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 8
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 16
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 32
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 64
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 128
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 256
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 512
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 1024
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 2048
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 4096
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 8192
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 16384
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 32768
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 65536
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 131072
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: lui a1, 262144
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 31
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 33
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 34
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 35
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 36
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 37
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 38
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 39
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 40
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 41
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 42
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 43
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 44
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 45
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 46
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 47
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 48
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 49
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 50
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 51
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 52
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 53
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 54
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 55
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 56
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 57
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 58
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 59
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 60
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: slli a1, a0, 61
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: slli a0, a0, 62
; RV64-NEXT: slli a1, a1, 63
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vand.vx v14, v10, a0
; RV64-NEXT: vand.vx v10, v10, a1
; RV64-NEXT: vmul.vv v14, v8, v14
; RV64-NEXT: vxor.vv v12, v12, v14
; RV64-NEXT: vmul.vv v8, v8, v10
; RV64-NEXT: vxor.vv v8, v12, v8
; RV64-NEXT: ret
%a = call <vscale x 2 x i64> @llvm.clmul.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y)
ret <vscale x 2 x i64> %a
}
define <vscale x 4 x i64> @clmul_nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y) nounwind {
; RV32-LABEL: clmul_nxv4i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -352
; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: li s4, 1
; RV32-NEXT: li a3, 2
; RV32-NEXT: li a2, 4
; RV32-NEXT: li a0, 8
; RV32-NEXT: li s3, 16
; RV32-NEXT: li s2, 32
; RV32-NEXT: li s5, 64
; RV32-NEXT: li s6, 128
; RV32-NEXT: li s8, 256
; RV32-NEXT: li s1, 512
; RV32-NEXT: li s7, 1024
; RV32-NEXT: lui ra, 1
; RV32-NEXT: lui s11, 2
; RV32-NEXT: lui s10, 4
; RV32-NEXT: lui s9, 8
; RV32-NEXT: lui s0, 16
; RV32-NEXT: lui t6, 32
; RV32-NEXT: lui t5, 64
; RV32-NEXT: lui t4, 128
; RV32-NEXT: lui t3, 256
; RV32-NEXT: lui t2, 512
; RV32-NEXT: lui t1, 1024
; RV32-NEXT: lui t0, 2048
; RV32-NEXT: lui a7, 4096
; RV32-NEXT: lui a6, 8192
; RV32-NEXT: lui a5, 16384
; RV32-NEXT: lui a4, 32768
; RV32-NEXT: sw a1, 272(sp)
; RV32-NEXT: sw zero, 276(sp)
; RV32-NEXT: sw zero, 264(sp)
; RV32-NEXT: sw s4, 268(sp)
; RV32-NEXT: sw zero, 256(sp)
; RV32-NEXT: sw a3, 260(sp)
; RV32-NEXT: lui a3, 65536
; RV32-NEXT: sw zero, 248(sp)
; RV32-NEXT: sw a2, 252(sp)
; RV32-NEXT: lui a2, 131072
; RV32-NEXT: sw zero, 240(sp)
; RV32-NEXT: sw a0, 244(sp)
; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV32-NEXT: vand.vi v28, v12, 2
; RV32-NEXT: vand.vi v4, v12, 1
; RV32-NEXT: vand.vi v24, v12, 4
; RV32-NEXT: vand.vi v20, v12, 8
; RV32-NEXT: sw zero, 232(sp)
; RV32-NEXT: sw s3, 236(sp)
; RV32-NEXT: vand.vx v16, v12, s3
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: addi s3, sp, 272
; RV32-NEXT: sw zero, 224(sp)
; RV32-NEXT: sw s2, 228(sp)
; RV32-NEXT: vand.vx v0, v12, s2
; RV32-NEXT: addi s2, sp, 264
; RV32-NEXT: sw zero, 216(sp)
; RV32-NEXT: sw s5, 220(sp)
; RV32-NEXT: vmul.vv v16, v8, v28
; RV32-NEXT: vmul.vv v28, v8, v4
; RV32-NEXT: vxor.vi v28, v28, 0
; RV32-NEXT: vxor.vv v28, v28, v16
; RV32-NEXT: vand.vx v16, v12, s5
; RV32-NEXT: addi s5, sp, 256
; RV32-NEXT: sw zero, 208(sp)
; RV32-NEXT: sw s6, 212(sp)
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v28, v28, v24
; RV32-NEXT: vand.vx v24, v12, s6
; RV32-NEXT: addi s6, sp, 248
; RV32-NEXT: sw zero, 200(sp)
; RV32-NEXT: sw s8, 204(sp)
; RV32-NEXT: vmul.vv v20, v8, v20
; RV32-NEXT: vxor.vv v20, v28, v20
; RV32-NEXT: vand.vx v28, v12, s8
; RV32-NEXT: addi s8, sp, 240
; RV32-NEXT: sw zero, 192(sp)
; RV32-NEXT: sw s1, 196(sp)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v4, v8, v4
; RV32-NEXT: vxor.vv v20, v20, v4
; RV32-NEXT: vand.vx v4, v12, s1
; RV32-NEXT: sw zero, 184(sp)
; RV32-NEXT: sw s7, 188(sp)
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v20, v20, v0
; RV32-NEXT: vand.vx v0, v12, s7
; RV32-NEXT: slli a0, s4, 11
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v20, v20, v16
; RV32-NEXT: vand.vx v16, v12, ra
; RV32-NEXT: sw zero, 176(sp)
; RV32-NEXT: sw a0, 180(sp)
; RV32-NEXT: sw zero, 168(sp)
; RV32-NEXT: sw ra, 172(sp)
; RV32-NEXT: addi s4, sp, 216
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v24, v20, v24
; RV32-NEXT: vand.vx v20, v12, s11
; RV32-NEXT: sw zero, 160(sp)
; RV32-NEXT: sw s11, 164(sp)
; RV32-NEXT: addi s11, sp, 208
; RV32-NEXT: vmul.vv v28, v8, v28
; RV32-NEXT: vxor.vv v28, v24, v28
; RV32-NEXT: vand.vx v24, v12, s10
; RV32-NEXT: sw zero, 152(sp)
; RV32-NEXT: sw s10, 156(sp)
; RV32-NEXT: addi s10, sp, 200
; RV32-NEXT: vmul.vv v4, v8, v4
; RV32-NEXT: vxor.vv v4, v28, v4
; RV32-NEXT: vand.vx v28, v12, s9
; RV32-NEXT: sw zero, 144(sp)
; RV32-NEXT: sw s9, 148(sp)
; RV32-NEXT: addi s9, sp, 192
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v4, v4, v0
; RV32-NEXT: vand.vx v0, v12, a0
; RV32-NEXT: addi ra, sp, 184
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v0, v4, v0
; RV32-NEXT: vand.vx v4, v12, s0
; RV32-NEXT: sw zero, 136(sp)
; RV32-NEXT: sw s0, 140(sp)
; RV32-NEXT: addi s1, sp, 176
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v0, v0, v16
; RV32-NEXT: vand.vx v16, v12, t6
; RV32-NEXT: sw zero, 128(sp)
; RV32-NEXT: sw t6, 132(sp)
; RV32-NEXT: addi s0, sp, 168
; RV32-NEXT: vmul.vv v20, v8, v20
; RV32-NEXT: vxor.vv v0, v0, v20
; RV32-NEXT: vand.vx v20, v12, t5
; RV32-NEXT: sw zero, 120(sp)
; RV32-NEXT: sw t5, 124(sp)
; RV32-NEXT: addi t6, sp, 160
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v0, v0, v24
; RV32-NEXT: vand.vx v24, v12, t4
; RV32-NEXT: sw zero, 112(sp)
; RV32-NEXT: sw t4, 116(sp)
; RV32-NEXT: addi t5, sp, 152
; RV32-NEXT: vmul.vv v28, v8, v28
; RV32-NEXT: vxor.vv v0, v0, v28
; RV32-NEXT: vand.vx v28, v12, t3
; RV32-NEXT: sw zero, 104(sp)
; RV32-NEXT: sw t3, 108(sp)
; RV32-NEXT: addi t4, sp, 144
; RV32-NEXT: vmul.vv v4, v8, v4
; RV32-NEXT: vxor.vv v0, v0, v4
; RV32-NEXT: vand.vx v4, v12, t2
; RV32-NEXT: sw zero, 96(sp)
; RV32-NEXT: sw t2, 100(sp)
; RV32-NEXT: addi t3, sp, 136
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v16, v0, v16
; RV32-NEXT: vand.vx v0, v12, t1
; RV32-NEXT: sw zero, 88(sp)
; RV32-NEXT: sw t1, 92(sp)
; RV32-NEXT: addi t2, sp, 128
; RV32-NEXT: vmul.vv v20, v8, v20
; RV32-NEXT: vxor.vv v20, v16, v20
; RV32-NEXT: vand.vx v16, v12, t0
; RV32-NEXT: sw zero, 80(sp)
; RV32-NEXT: sw t0, 84(sp)
; RV32-NEXT: addi t1, sp, 120
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v24, v20, v24
; RV32-NEXT: vand.vx v20, v12, a7
; RV32-NEXT: sw zero, 72(sp)
; RV32-NEXT: sw a7, 76(sp)
; RV32-NEXT: addi t0, sp, 112
; RV32-NEXT: vmul.vv v28, v8, v28
; RV32-NEXT: vxor.vv v24, v24, v28
; RV32-NEXT: vand.vx v28, v12, a6
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: sw zero, 64(sp)
; RV32-NEXT: sw a6, 68(sp)
; RV32-NEXT: addi a7, sp, 104
; RV32-NEXT: vmul.vv v28, v8, v4
; RV32-NEXT: vxor.vv v24, v24, v28
; RV32-NEXT: vand.vx v28, v12, a5
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: sw zero, 56(sp)
; RV32-NEXT: sw a5, 60(sp)
; RV32-NEXT: addi a6, sp, 96
; RV32-NEXT: vmul.vv v28, v8, v0
; RV32-NEXT: vxor.vv v28, v24, v28
; RV32-NEXT: vand.vx v24, v12, a4
; RV32-NEXT: sw zero, 48(sp)
; RV32-NEXT: sw a4, 52(sp)
; RV32-NEXT: addi a5, sp, 88
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v16, v28, v16
; RV32-NEXT: vand.vx v28, v12, a3
; RV32-NEXT: sw zero, 40(sp)
; RV32-NEXT: sw a3, 44(sp)
; RV32-NEXT: addi a4, sp, 80
; RV32-NEXT: vmul.vv v20, v8, v20
; RV32-NEXT: vxor.vv v16, v16, v20
; RV32-NEXT: vand.vx v4, v12, a2
; RV32-NEXT: sw zero, 32(sp)
; RV32-NEXT: sw a2, 36(sp)
; RV32-NEXT: addi a3, sp, 72
; RV32-NEXT: sw zero, 24(sp)
; RV32-NEXT: lui a1, 262144
; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw zero, 16(sp)
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: sw a0, 20(sp)
; RV32-NEXT: addi a2, sp, 64
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv s7, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add s7, s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add s7, s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, s7
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v20, v8, v20
; RV32-NEXT: vxor.vv v20, v16, v20
; RV32-NEXT: vlse64.v v16, (s3), zero
; RV32-NEXT: addi s3, sp, 56
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add s7, s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, s7
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v0, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v0, v20, v0
; RV32-NEXT: vlse64.v v20, (s2), zero
; RV32-NEXT: addi s2, sp, 48
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v0, v0, v24
; RV32-NEXT: vlse64.v v24, (s5), zero
; RV32-NEXT: addi s5, sp, 40
; RV32-NEXT: vmul.vv v28, v8, v28
; RV32-NEXT: vxor.vv v0, v0, v28
; RV32-NEXT: vlse64.v v28, (s6), zero
; RV32-NEXT: addi s6, sp, 32
; RV32-NEXT: vmul.vv v4, v8, v4
; RV32-NEXT: vxor.vv v4, v0, v4
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv s7, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add s7, s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add s7, s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, s7
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs4r.v v4, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vlse64.v v4, (s8), zero
; RV32-NEXT: addi s8, sp, 24
; RV32-NEXT: vand.vv v16, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: mv s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, s7
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v20
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv s7, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add s7, s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, s7
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv s7, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add s7, s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, s7
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v28
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add s7, s7, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add s7, s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, s7
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v4
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add s7, s7, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, s7
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: addi s7, sp, 232
; RV32-NEXT: vlse64.v v16, (s7), zero
; RV32-NEXT: addi s7, sp, 224
; RV32-NEXT: vlse64.v v20, (s7), zero
; RV32-NEXT: vlse64.v v24, (s4), zero
; RV32-NEXT: vlse64.v v28, (s11), zero
; RV32-NEXT: vand.vv v16, v12, v16
; RV32-NEXT: csrr s4, vlenb
; RV32-NEXT: slli s4, s4, 4
; RV32-NEXT: add s4, sp, s4
; RV32-NEXT: addi s4, s4, 288
; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v20
; RV32-NEXT: csrr s4, vlenb
; RV32-NEXT: slli s4, s4, 2
; RV32-NEXT: mv s7, s4
; RV32-NEXT: slli s4, s4, 1
; RV32-NEXT: add s7, s7, s4
; RV32-NEXT: slli s4, s4, 2
; RV32-NEXT: add s4, s4, s7
; RV32-NEXT: add s4, sp, s4
; RV32-NEXT: addi s4, s4, 288
; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v24
; RV32-NEXT: csrr s4, vlenb
; RV32-NEXT: slli s4, s4, 2
; RV32-NEXT: mv s7, s4
; RV32-NEXT: slli s4, s4, 4
; RV32-NEXT: add s4, s4, s7
; RV32-NEXT: add s4, sp, s4
; RV32-NEXT: addi s4, s4, 288
; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v28
; RV32-NEXT: csrr s4, vlenb
; RV32-NEXT: slli s4, s4, 2
; RV32-NEXT: mv s7, s4
; RV32-NEXT: slli s4, s4, 1
; RV32-NEXT: add s7, s7, s4
; RV32-NEXT: slli s4, s4, 1
; RV32-NEXT: add s7, s7, s4
; RV32-NEXT: slli s4, s4, 2
; RV32-NEXT: add s4, s4, s7
; RV32-NEXT: add s4, sp, s4
; RV32-NEXT: addi s4, s4, 288
; RV32-NEXT: vs4r.v v16, (s4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vlse64.v v20, (s10), zero
; RV32-NEXT: vlse64.v v24, (s9), zero
; RV32-NEXT: vlse64.v v28, (ra), zero
; RV32-NEXT: vlse64.v v4, (s1), zero
; RV32-NEXT: vand.vv v16, v12, v20
; RV32-NEXT: csrr s1, vlenb
; RV32-NEXT: slli s1, s1, 2
; RV32-NEXT: mv s4, s1
; RV32-NEXT: slli s1, s1, 1
; RV32-NEXT: add s1, s1, s4
; RV32-NEXT: add s1, sp, s1
; RV32-NEXT: addi s1, s1, 288
; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v24
; RV32-NEXT: csrr s1, vlenb
; RV32-NEXT: slli s1, s1, 3
; RV32-NEXT: mv s4, s1
; RV32-NEXT: slli s1, s1, 2
; RV32-NEXT: add s1, s1, s4
; RV32-NEXT: add s1, sp, s1
; RV32-NEXT: addi s1, s1, 288
; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v28
; RV32-NEXT: csrr s1, vlenb
; RV32-NEXT: slli s1, s1, 6
; RV32-NEXT: add s1, sp, s1
; RV32-NEXT: addi s1, s1, 288
; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v4
; RV32-NEXT: csrr s1, vlenb
; RV32-NEXT: slli s1, s1, 3
; RV32-NEXT: mv s4, s1
; RV32-NEXT: slli s1, s1, 1
; RV32-NEXT: add s4, s4, s1
; RV32-NEXT: slli s1, s1, 2
; RV32-NEXT: add s1, s1, s4
; RV32-NEXT: add s1, sp, s1
; RV32-NEXT: addi s1, s1, 288
; RV32-NEXT: vs4r.v v16, (s1) # vscale x 32-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (s0), zero
; RV32-NEXT: vlse64.v v28, (t6), zero
; RV32-NEXT: vlse64.v v4, (t5), zero
; RV32-NEXT: vlse64.v v0, (t4), zero
; RV32-NEXT: vand.vv v16, v12, v24
; RV32-NEXT: csrr t4, vlenb
; RV32-NEXT: slli t4, t4, 3
; RV32-NEXT: add t4, sp, t4
; RV32-NEXT: addi t4, t4, 288
; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v28
; RV32-NEXT: csrr t4, vlenb
; RV32-NEXT: slli t4, t4, 2
; RV32-NEXT: mv t5, t4
; RV32-NEXT: slli t4, t4, 3
; RV32-NEXT: add t4, t4, t5
; RV32-NEXT: add t4, sp, t4
; RV32-NEXT: addi t4, t4, 288
; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v4
; RV32-NEXT: csrr t4, vlenb
; RV32-NEXT: slli t4, t4, 2
; RV32-NEXT: mv t5, t4
; RV32-NEXT: slli t4, t4, 1
; RV32-NEXT: add t5, t5, t4
; RV32-NEXT: slli t4, t4, 1
; RV32-NEXT: add t5, t5, t4
; RV32-NEXT: slli t4, t4, 1
; RV32-NEXT: add t4, t4, t5
; RV32-NEXT: add t4, sp, t4
; RV32-NEXT: addi t4, t4, 288
; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v0
; RV32-NEXT: csrr t4, vlenb
; RV32-NEXT: slli t4, t4, 2
; RV32-NEXT: mv t5, t4
; RV32-NEXT: slli t4, t4, 2
; RV32-NEXT: add t5, t5, t4
; RV32-NEXT: slli t4, t4, 2
; RV32-NEXT: add t4, t4, t5
; RV32-NEXT: add t4, sp, t4
; RV32-NEXT: addi t4, t4, 288
; RV32-NEXT: vs4r.v v16, (t4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vlse64.v v28, (t3), zero
; RV32-NEXT: vlse64.v v4, (t2), zero
; RV32-NEXT: vlse64.v v0, (t1), zero
; RV32-NEXT: vlse64.v v16, (t0), zero
; RV32-NEXT: vand.vv v20, v12, v28
; RV32-NEXT: csrr t0, vlenb
; RV32-NEXT: slli t0, t0, 2
; RV32-NEXT: add t0, sp, t0
; RV32-NEXT: addi t0, t0, 288
; RV32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v20, v12, v4
; RV32-NEXT: csrr t0, vlenb
; RV32-NEXT: slli t0, t0, 5
; RV32-NEXT: add t0, sp, t0
; RV32-NEXT: addi t0, t0, 288
; RV32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v20, v12, v0
; RV32-NEXT: csrr t0, vlenb
; RV32-NEXT: slli t0, t0, 3
; RV32-NEXT: mv t1, t0
; RV32-NEXT: slli t0, t0, 1
; RV32-NEXT: add t1, t1, t0
; RV32-NEXT: slli t0, t0, 1
; RV32-NEXT: add t0, t0, t1
; RV32-NEXT: add t0, sp, t0
; RV32-NEXT: addi t0, t0, 288
; RV32-NEXT: vs4r.v v20, (t0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v16
; RV32-NEXT: csrr t0, vlenb
; RV32-NEXT: slli t0, t0, 4
; RV32-NEXT: mv t1, t0
; RV32-NEXT: slli t0, t0, 2
; RV32-NEXT: add t0, t0, t1
; RV32-NEXT: add t0, sp, t0
; RV32-NEXT: addi t0, t0, 288
; RV32-NEXT: vs4r.v v16, (t0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vlse64.v v16, (a7), zero
; RV32-NEXT: vlse64.v v0, (a6), zero
; RV32-NEXT: vlse64.v v20, (a5), zero
; RV32-NEXT: vlse64.v v24, (a4), zero
; RV32-NEXT: vand.vv v4, v12, v16
; RV32-NEXT: vand.vv v16, v12, v0
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 2
; RV32-NEXT: mv a5, a4
; RV32-NEXT: slli a4, a4, 1
; RV32-NEXT: add a5, a5, a4
; RV32-NEXT: slli a4, a4, 1
; RV32-NEXT: add a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 288
; RV32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v20
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 2
; RV32-NEXT: mv a5, a4
; RV32-NEXT: slli a4, a4, 2
; RV32-NEXT: add a5, a5, a4
; RV32-NEXT: slli a4, a4, 1
; RV32-NEXT: add a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 288
; RV32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v24
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 2
; RV32-NEXT: mv a5, a4
; RV32-NEXT: slli a4, a4, 1
; RV32-NEXT: add a5, a5, a4
; RV32-NEXT: slli a4, a4, 3
; RV32-NEXT: add a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 288
; RV32-NEXT: vs4r.v v16, (a4) # vscale x 32-byte Folded Spill
; RV32-NEXT: vlse64.v v16, (a3), zero
; RV32-NEXT: vlse64.v v20, (a2), zero
; RV32-NEXT: vlse64.v v24, (s3), zero
; RV32-NEXT: vlse64.v v28, (s2), zero
; RV32-NEXT: vand.vv v0, v12, v16
; RV32-NEXT: vand.vv v16, v12, v20
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: mv a3, a2
; RV32-NEXT: slli a2, a2, 1
; RV32-NEXT: add a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 288
; RV32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v24
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 4
; RV32-NEXT: mv a3, a2
; RV32-NEXT: slli a2, a2, 1
; RV32-NEXT: add a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 288
; RV32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v28
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: mv a3, a2
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 288
; RV32-NEXT: vs4r.v v16, (a2) # vscale x 32-byte Folded Spill
; RV32-NEXT: vlse64.v v16, (s5), zero
; RV32-NEXT: vlse64.v v20, (s6), zero
; RV32-NEXT: vlse64.v v24, (s8), zero
; RV32-NEXT: vlse64.v v28, (a0), zero
; RV32-NEXT: vand.vv v16, v12, v16
; RV32-NEXT: addi a0, sp, 288
; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v16, v12, v20
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a2, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a2
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
; RV32-NEXT: vand.vv v24, v12, v24
; RV32-NEXT: vand.vv v20, v12, v28
; RV32-NEXT: vand.vx v12, v12, a1
; RV32-NEXT: vmul.vv v12, v8, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vxor.vv v12, v16, v12
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 6
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: vmul.vv v16, v8, v4
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: vmul.vv v16, v8, v0
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: addi a0, sp, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: vmul.vv v16, v8, v24
; RV32-NEXT: vxor.vv v12, v12, v16
; RV32-NEXT: vmul.vv v8, v8, v20
; RV32-NEXT: vxor.vv v8, v12, v8
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 352
; RV32-NEXT: ret
;
; RV64-LABEL: clmul_nxv4i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vand.vi v16, v12, 2
; RV64-NEXT: vand.vi v20, v12, 1
; RV64-NEXT: vmul.vv v16, v8, v16
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v20, v16
; RV64-NEXT: vand.vi v20, v12, 4
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vi v20, v12, 8
; RV64-NEXT: li a0, 16
; RV64-NEXT: li a1, 32
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a0
; RV64-NEXT: li a0, 64
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: li a1, 128
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a0
; RV64-NEXT: li a0, 256
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: li a1, 512
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a0
; RV64-NEXT: li a2, 1024
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: li a0, 1
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a2
; RV64-NEXT: slli a1, a0, 11
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 1
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 2
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 4
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 8
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 16
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 32
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 64
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 128
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 256
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 512
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 1024
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 2048
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 4096
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 8192
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 16384
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 32768
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 65536
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 131072
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: lui a1, 262144
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 31
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 33
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 34
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 35
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 36
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 37
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 38
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 39
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 40
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 41
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 42
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 43
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 44
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 45
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 46
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 47
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 48
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 49
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 50
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 51
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 52
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 53
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 54
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 55
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 56
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 57
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 58
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 59
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 60
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: slli a1, a0, 61
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: slli a0, a0, 62
; RV64-NEXT: slli a1, a1, 63
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vand.vx v20, v12, a0
; RV64-NEXT: vand.vx v12, v12, a1
; RV64-NEXT: vmul.vv v20, v8, v20
; RV64-NEXT: vxor.vv v16, v16, v20
; RV64-NEXT: vmul.vv v8, v8, v12
; RV64-NEXT: vxor.vv v8, v16, v8
; RV64-NEXT: ret
%a = call <vscale x 4 x i64> @llvm.clmul.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %y)
ret <vscale x 4 x i64> %a
}
define <vscale x 8 x i64> @clmul_nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y) nounwind {
; RV32-LABEL: clmul_nxv8i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -352
; RV32-NEXT: sw ra, 348(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 344(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 340(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 336(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s3, 332(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s4, 328(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s5, 324(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 320(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s7, 316(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s8, 312(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s9, 308(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s10, 304(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s11, 300(sp) # 4-byte Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: li s5, 1
; RV32-NEXT: li a3, 2
; RV32-NEXT: li a2, 4
; RV32-NEXT: li s10, 8
; RV32-NEXT: li a0, 16
; RV32-NEXT: li t6, 32
; RV32-NEXT: li s1, 64
; RV32-NEXT: li s3, 128
; RV32-NEXT: li s7, 256
; RV32-NEXT: li s4, 512
; RV32-NEXT: li s8, 1024
; RV32-NEXT: lui ra, 1
; RV32-NEXT: lui s11, 2
; RV32-NEXT: lui s9, 4
; RV32-NEXT: lui s6, 8
; RV32-NEXT: lui s2, 16
; RV32-NEXT: lui s0, 32
; RV32-NEXT: lui t5, 64
; RV32-NEXT: lui t4, 128
; RV32-NEXT: lui t3, 256
; RV32-NEXT: lui t2, 512
; RV32-NEXT: lui t1, 1024
; RV32-NEXT: lui t0, 2048
; RV32-NEXT: lui a7, 4096
; RV32-NEXT: lui a6, 8192
; RV32-NEXT: lui a5, 16384
; RV32-NEXT: lui a4, 32768
; RV32-NEXT: sw a1, 272(sp)
; RV32-NEXT: sw zero, 276(sp)
; RV32-NEXT: sw zero, 264(sp)
; RV32-NEXT: sw s5, 268(sp)
; RV32-NEXT: sw zero, 256(sp)
; RV32-NEXT: sw a3, 260(sp)
; RV32-NEXT: lui a3, 65536
; RV32-NEXT: sw zero, 248(sp)
; RV32-NEXT: sw a2, 252(sp)
; RV32-NEXT: lui a2, 131072
; RV32-NEXT: sw zero, 240(sp)
; RV32-NEXT: sw s10, 244(sp)
; RV32-NEXT: vsetvli s10, zero, e64, m8, ta, ma
; RV32-NEXT: vand.vi v24, v16, 2
; RV32-NEXT: vand.vi v0, v16, 1
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vi v0, v0, 0
; RV32-NEXT: vxor.vv v24, v0, v24
; RV32-NEXT: vand.vi v0, v16, 4
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vi v0, v16, 8
; RV32-NEXT: sw zero, 232(sp)
; RV32-NEXT: sw a0, 236(sp)
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, a0
; RV32-NEXT: addi s10, sp, 272
; RV32-NEXT: sw zero, 224(sp)
; RV32-NEXT: sw t6, 228(sp)
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, t6
; RV32-NEXT: sw zero, 216(sp)
; RV32-NEXT: sw s1, 220(sp)
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s1
; RV32-NEXT: sw zero, 208(sp)
; RV32-NEXT: sw s3, 212(sp)
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s3
; RV32-NEXT: sw zero, 200(sp)
; RV32-NEXT: sw s7, 204(sp)
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s7
; RV32-NEXT: sw zero, 192(sp)
; RV32-NEXT: sw s4, 196(sp)
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s4
; RV32-NEXT: sw zero, 184(sp)
; RV32-NEXT: sw s8, 188(sp)
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s8
; RV32-NEXT: slli s5, s5, 11
; RV32-NEXT: sw zero, 176(sp)
; RV32-NEXT: sw s5, 180(sp)
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s5
; RV32-NEXT: addi s5, sp, 216
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, ra
; RV32-NEXT: sw zero, 168(sp)
; RV32-NEXT: sw ra, 172(sp)
; RV32-NEXT: addi ra, sp, 208
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s11
; RV32-NEXT: sw zero, 160(sp)
; RV32-NEXT: sw s11, 164(sp)
; RV32-NEXT: addi s11, sp, 200
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s9
; RV32-NEXT: sw zero, 152(sp)
; RV32-NEXT: sw s9, 156(sp)
; RV32-NEXT: addi s9, sp, 192
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s6
; RV32-NEXT: sw zero, 144(sp)
; RV32-NEXT: sw s6, 148(sp)
; RV32-NEXT: addi s6, sp, 184
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s2
; RV32-NEXT: sw zero, 136(sp)
; RV32-NEXT: sw s2, 140(sp)
; RV32-NEXT: addi s3, sp, 176
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, s0
; RV32-NEXT: sw zero, 128(sp)
; RV32-NEXT: sw s0, 132(sp)
; RV32-NEXT: addi s4, sp, 168
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, t5
; RV32-NEXT: sw zero, 120(sp)
; RV32-NEXT: sw t5, 124(sp)
; RV32-NEXT: addi s2, sp, 160
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, t4
; RV32-NEXT: sw zero, 112(sp)
; RV32-NEXT: sw t4, 116(sp)
; RV32-NEXT: addi s1, sp, 152
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, t3
; RV32-NEXT: sw zero, 104(sp)
; RV32-NEXT: sw t3, 108(sp)
; RV32-NEXT: addi t6, sp, 144
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, t2
; RV32-NEXT: sw zero, 96(sp)
; RV32-NEXT: sw t2, 100(sp)
; RV32-NEXT: addi s0, sp, 136
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, t1
; RV32-NEXT: sw zero, 88(sp)
; RV32-NEXT: sw t1, 92(sp)
; RV32-NEXT: addi t5, sp, 128
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, t0
; RV32-NEXT: sw zero, 80(sp)
; RV32-NEXT: sw t0, 84(sp)
; RV32-NEXT: addi t4, sp, 120
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, a7
; RV32-NEXT: sw zero, 72(sp)
; RV32-NEXT: sw a7, 76(sp)
; RV32-NEXT: addi t2, sp, 112
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, a6
; RV32-NEXT: sw zero, 64(sp)
; RV32-NEXT: sw a6, 68(sp)
; RV32-NEXT: addi t3, sp, 104
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, a5
; RV32-NEXT: sw zero, 56(sp)
; RV32-NEXT: sw a5, 60(sp)
; RV32-NEXT: addi t1, sp, 96
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, a4
; RV32-NEXT: sw zero, 48(sp)
; RV32-NEXT: sw a4, 52(sp)
; RV32-NEXT: addi t0, sp, 88
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, a3
; RV32-NEXT: sw zero, 40(sp)
; RV32-NEXT: sw a3, 44(sp)
; RV32-NEXT: addi a7, sp, 80
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: vand.vx v0, v16, a2
; RV32-NEXT: sw zero, 32(sp)
; RV32-NEXT: sw a2, 36(sp)
; RV32-NEXT: sw zero, 24(sp)
; RV32-NEXT: lui a0, 262144
; RV32-NEXT: sw a0, 28(sp)
; RV32-NEXT: sw zero, 16(sp)
; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: vmul.vv v0, v8, v0
; RV32-NEXT: vxor.vv v24, v24, v0
; RV32-NEXT: sw t2, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: mv a2, a1
; RV32-NEXT: slli a1, a1, 5
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (s10), zero
; RV32-NEXT: addi a6, sp, 72
; RV32-NEXT: addi a5, sp, 64
; RV32-NEXT: addi a4, sp, 56
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 8
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: addi a1, sp, 48
; RV32-NEXT: addi s10, sp, 40
; RV32-NEXT: addi a3, sp, 32
; RV32-NEXT: addi a2, sp, 24
; RV32-NEXT: addi s7, sp, 264
; RV32-NEXT: vlse64.v v24, (s7), zero
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 4
; RV32-NEXT: mv s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, s7
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: addi s7, sp, 256
; RV32-NEXT: vlse64.v v0, (s7), zero
; RV32-NEXT: addi s7, sp, 248
; RV32-NEXT: vlse64.v v24, (s7), zero
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: mv s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, s7
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: addi s7, sp, 240
; RV32-NEXT: vlse64.v v24, (s7), zero
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: mv s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 2
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, s7
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 4
; RV32-NEXT: mv s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, s7
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 5
; RV32-NEXT: mv s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, s7
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: vand.vv v24, v16, v0
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: mv s7, t2
; RV32-NEXT: slli t2, t2, 2
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, s7
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: mv s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, s7
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 4
; RV32-NEXT: mv s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, s7
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: mv s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 2
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, s7
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: mv s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add s7, s7, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, s7
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: addi s7, sp, 16
; RV32-NEXT: addi s8, sp, 232
; RV32-NEXT: vlse64.v v24, (s8), zero
; RV32-NEXT: csrr s8, vlenb
; RV32-NEXT: slli s8, s8, 4
; RV32-NEXT: mv t2, s8
; RV32-NEXT: slli s8, s8, 2
; RV32-NEXT: add t2, t2, s8
; RV32-NEXT: slli s8, s8, 1
; RV32-NEXT: add s8, s8, t2
; RV32-NEXT: lw t2, 4(sp) # 4-byte Folded Reload
; RV32-NEXT: add s8, sp, s8
; RV32-NEXT: addi s8, s8, 288
; RV32-NEXT: vs8r.v v24, (s8) # vscale x 64-byte Folded Spill
; RV32-NEXT: addi s8, sp, 224
; RV32-NEXT: vlse64.v v0, (s8), zero
; RV32-NEXT: vlse64.v v24, (s5), zero
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: mv s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 2
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s5, s5, s8
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (ra), zero
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: mv s8, s5
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s5, s5, s8
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 4
; RV32-NEXT: mv s8, s5
; RV32-NEXT: slli s5, s5, 2
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s5, s5, s8
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: mv s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s5, s5, s8
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
; RV32-NEXT: vand.vv v24, v16, v0
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 4
; RV32-NEXT: mv s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s5, s5, s8
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: mv s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 2
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s5, s5, s8
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: mv s8, s5
; RV32-NEXT: slli s5, s5, 2
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 2
; RV32-NEXT: add s5, s5, s8
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: mv s8, s5
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s5, s5, s8
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vl8r.v v24, (s5) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: mv s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 2
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s5, s5, s8
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v0, (s11), zero
; RV32-NEXT: vlse64.v v24, (s9), zero
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: mv s8, s5
; RV32-NEXT: slli s5, s5, 3
; RV32-NEXT: add s8, s8, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s5, s5, s8
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (s6), zero
; RV32-NEXT: csrr s5, vlenb
; RV32-NEXT: slli s5, s5, 4
; RV32-NEXT: mv s6, s5
; RV32-NEXT: slli s5, s5, 2
; RV32-NEXT: add s6, s6, s5
; RV32-NEXT: slli s5, s5, 1
; RV32-NEXT: add s5, s5, s6
; RV32-NEXT: add s5, sp, s5
; RV32-NEXT: addi s5, s5, 288
; RV32-NEXT: vs8r.v v24, (s5) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (s3), zero
; RV32-NEXT: csrr s3, vlenb
; RV32-NEXT: slli s3, s3, 6
; RV32-NEXT: mv s5, s3
; RV32-NEXT: slli s3, s3, 1
; RV32-NEXT: add s3, s3, s5
; RV32-NEXT: add s3, sp, s3
; RV32-NEXT: addi s3, s3, 288
; RV32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill
; RV32-NEXT: vand.vv v0, v16, v0
; RV32-NEXT: csrr s3, vlenb
; RV32-NEXT: slli s3, s3, 4
; RV32-NEXT: mv s5, s3
; RV32-NEXT: slli s3, s3, 1
; RV32-NEXT: add s3, s3, s5
; RV32-NEXT: add s3, sp, s3
; RV32-NEXT: addi s3, s3, 288
; RV32-NEXT: vs8r.v v0, (s3) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr s3, vlenb
; RV32-NEXT: slli s3, s3, 3
; RV32-NEXT: mv s5, s3
; RV32-NEXT: slli s3, s3, 3
; RV32-NEXT: add s5, s5, s3
; RV32-NEXT: slli s3, s3, 1
; RV32-NEXT: add s3, s3, s5
; RV32-NEXT: add s3, sp, s3
; RV32-NEXT: addi s3, s3, 288
; RV32-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr s3, vlenb
; RV32-NEXT: slli s3, s3, 3
; RV32-NEXT: mv s5, s3
; RV32-NEXT: slli s3, s3, 2
; RV32-NEXT: add s5, s5, s3
; RV32-NEXT: slli s3, s3, 1
; RV32-NEXT: add s3, s3, s5
; RV32-NEXT: add s3, sp, s3
; RV32-NEXT: addi s3, s3, 288
; RV32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr s3, vlenb
; RV32-NEXT: slli s3, s3, 4
; RV32-NEXT: mv s5, s3
; RV32-NEXT: slli s3, s3, 2
; RV32-NEXT: add s5, s5, s3
; RV32-NEXT: slli s3, s3, 1
; RV32-NEXT: add s3, s3, s5
; RV32-NEXT: add s3, sp, s3
; RV32-NEXT: addi s3, s3, 288
; RV32-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr s3, vlenb
; RV32-NEXT: slli s3, s3, 5
; RV32-NEXT: mv s5, s3
; RV32-NEXT: slli s3, s3, 2
; RV32-NEXT: add s3, s3, s5
; RV32-NEXT: add s3, sp, s3
; RV32-NEXT: addi s3, s3, 288
; RV32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr s3, vlenb
; RV32-NEXT: slli s3, s3, 6
; RV32-NEXT: mv s5, s3
; RV32-NEXT: slli s3, s3, 1
; RV32-NEXT: add s3, s3, s5
; RV32-NEXT: add s3, sp, s3
; RV32-NEXT: addi s3, s3, 288
; RV32-NEXT: vl8r.v v24, (s3) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr s3, vlenb
; RV32-NEXT: slli s3, s3, 4
; RV32-NEXT: mv s5, s3
; RV32-NEXT: slli s3, s3, 2
; RV32-NEXT: add s5, s5, s3
; RV32-NEXT: slli s3, s3, 1
; RV32-NEXT: add s3, s3, s5
; RV32-NEXT: add s3, sp, s3
; RV32-NEXT: addi s3, s3, 288
; RV32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (s4), zero
; RV32-NEXT: csrr s3, vlenb
; RV32-NEXT: slli s3, s3, 6
; RV32-NEXT: mv s4, s3
; RV32-NEXT: slli s3, s3, 1
; RV32-NEXT: add s3, s3, s4
; RV32-NEXT: add s3, sp, s3
; RV32-NEXT: addi s3, s3, 288
; RV32-NEXT: vs8r.v v24, (s3) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v0, (s2), zero
; RV32-NEXT: vlse64.v v24, (s1), zero
; RV32-NEXT: csrr s1, vlenb
; RV32-NEXT: slli s1, s1, 3
; RV32-NEXT: mv s2, s1
; RV32-NEXT: slli s1, s1, 3
; RV32-NEXT: add s2, s2, s1
; RV32-NEXT: slli s1, s1, 1
; RV32-NEXT: add s1, s1, s2
; RV32-NEXT: add s1, sp, s1
; RV32-NEXT: addi s1, s1, 288
; RV32-NEXT: vs8r.v v24, (s1) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (t6), zero
; RV32-NEXT: csrr t6, vlenb
; RV32-NEXT: slli t6, t6, 3
; RV32-NEXT: mv s1, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add s1, s1, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add s1, s1, t6
; RV32-NEXT: slli t6, t6, 2
; RV32-NEXT: add t6, t6, s1
; RV32-NEXT: add t6, sp, t6
; RV32-NEXT: addi t6, t6, 288
; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr t6, vlenb
; RV32-NEXT: slli t6, t6, 6
; RV32-NEXT: mv s1, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add t6, t6, s1
; RV32-NEXT: add t6, sp, t6
; RV32-NEXT: addi t6, t6, 288
; RV32-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr t6, vlenb
; RV32-NEXT: slli t6, t6, 3
; RV32-NEXT: mv s1, t6
; RV32-NEXT: slli t6, t6, 2
; RV32-NEXT: add t6, t6, s1
; RV32-NEXT: add t6, sp, t6
; RV32-NEXT: addi t6, t6, 288
; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
; RV32-NEXT: vand.vv v24, v16, v0
; RV32-NEXT: csrr t6, vlenb
; RV32-NEXT: slli t6, t6, 5
; RV32-NEXT: mv s1, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add t6, t6, s1
; RV32-NEXT: add t6, sp, t6
; RV32-NEXT: addi t6, t6, 288
; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr t6, vlenb
; RV32-NEXT: slli t6, t6, 3
; RV32-NEXT: mv s1, t6
; RV32-NEXT: slli t6, t6, 3
; RV32-NEXT: add s1, s1, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add t6, t6, s1
; RV32-NEXT: add t6, sp, t6
; RV32-NEXT: addi t6, t6, 288
; RV32-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr t6, vlenb
; RV32-NEXT: slli t6, t6, 3
; RV32-NEXT: mv s1, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add s1, s1, t6
; RV32-NEXT: slli t6, t6, 3
; RV32-NEXT: add t6, t6, s1
; RV32-NEXT: add t6, sp, t6
; RV32-NEXT: addi t6, t6, 288
; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr t6, vlenb
; RV32-NEXT: slli t6, t6, 3
; RV32-NEXT: mv s1, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add s1, s1, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add s1, s1, t6
; RV32-NEXT: slli t6, t6, 2
; RV32-NEXT: add t6, t6, s1
; RV32-NEXT: add t6, sp, t6
; RV32-NEXT: addi t6, t6, 288
; RV32-NEXT: vl8r.v v24, (t6) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr t6, vlenb
; RV32-NEXT: slli t6, t6, 3
; RV32-NEXT: mv s1, t6
; RV32-NEXT: slli t6, t6, 3
; RV32-NEXT: add s1, s1, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add t6, t6, s1
; RV32-NEXT: add t6, sp, t6
; RV32-NEXT: addi t6, t6, 288
; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (s0), zero
; RV32-NEXT: csrr t6, vlenb
; RV32-NEXT: slli t6, t6, 3
; RV32-NEXT: mv s0, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add s0, s0, t6
; RV32-NEXT: slli t6, t6, 1
; RV32-NEXT: add s0, s0, t6
; RV32-NEXT: slli t6, t6, 2
; RV32-NEXT: add t6, t6, s0
; RV32-NEXT: add t6, sp, t6
; RV32-NEXT: addi t6, t6, 288
; RV32-NEXT: vs8r.v v24, (t6) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v0, (t5), zero
; RV32-NEXT: vlse64.v v24, (t4), zero
; RV32-NEXT: csrr t4, vlenb
; RV32-NEXT: slli t4, t4, 6
; RV32-NEXT: mv t5, t4
; RV32-NEXT: slli t4, t4, 1
; RV32-NEXT: add t4, t4, t5
; RV32-NEXT: add t4, sp, t4
; RV32-NEXT: addi t4, t4, 288
; RV32-NEXT: vs8r.v v24, (t4) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (t2), zero
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 4
; RV32-NEXT: mv t4, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t4, t4, t2
; RV32-NEXT: slli t2, t2, 2
; RV32-NEXT: add t2, t2, t4
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: mv t4, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t4, t4, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t4, t4, t2
; RV32-NEXT: slli t2, t2, 2
; RV32-NEXT: add t2, t2, t4
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 5
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: vand.vv v24, v16, v0
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: mv t4, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t4, t4, t2
; RV32-NEXT: slli t2, t2, 2
; RV32-NEXT: add t2, t2, t4
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 6
; RV32-NEXT: mv t4, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, t4
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 4
; RV32-NEXT: mv t4, t2
; RV32-NEXT: slli t2, t2, 3
; RV32-NEXT: add t2, t2, t4
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 4
; RV32-NEXT: mv t4, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t4, t4, t2
; RV32-NEXT: slli t2, t2, 2
; RV32-NEXT: add t2, t2, t4
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vl8r.v v24, (t2) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 6
; RV32-NEXT: mv t4, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t2, t2, t4
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (t3), zero
; RV32-NEXT: csrr t2, vlenb
; RV32-NEXT: slli t2, t2, 4
; RV32-NEXT: mv t3, t2
; RV32-NEXT: slli t2, t2, 1
; RV32-NEXT: add t3, t3, t2
; RV32-NEXT: slli t2, t2, 2
; RV32-NEXT: add t2, t2, t3
; RV32-NEXT: add t2, sp, t2
; RV32-NEXT: addi t2, t2, 288
; RV32-NEXT: vs8r.v v24, (t2) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v0, (t1), zero
; RV32-NEXT: vlse64.v v24, (t0), zero
; RV32-NEXT: csrr t0, vlenb
; RV32-NEXT: slli t0, t0, 3
; RV32-NEXT: mv t1, t0
; RV32-NEXT: slli t0, t0, 1
; RV32-NEXT: add t1, t1, t0
; RV32-NEXT: slli t0, t0, 1
; RV32-NEXT: add t1, t1, t0
; RV32-NEXT: slli t0, t0, 2
; RV32-NEXT: add t0, t0, t1
; RV32-NEXT: add t0, sp, t0
; RV32-NEXT: addi t0, t0, 288
; RV32-NEXT: vs8r.v v24, (t0) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (a7), zero
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 7
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 4
; RV32-NEXT: mv t0, a7
; RV32-NEXT: slli a7, a7, 1
; RV32-NEXT: add t0, t0, a7
; RV32-NEXT: slli a7, a7, 2
; RV32-NEXT: add a7, a7, t0
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 3
; RV32-NEXT: mv t0, a7
; RV32-NEXT: slli a7, a7, 1
; RV32-NEXT: add a7, a7, t0
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill
; RV32-NEXT: vand.vv v24, v16, v0
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 4
; RV32-NEXT: mv t0, a7
; RV32-NEXT: slli a7, a7, 2
; RV32-NEXT: add a7, a7, t0
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 3
; RV32-NEXT: mv t0, a7
; RV32-NEXT: slli a7, a7, 1
; RV32-NEXT: add t0, t0, a7
; RV32-NEXT: slli a7, a7, 1
; RV32-NEXT: add t0, t0, a7
; RV32-NEXT: slli a7, a7, 2
; RV32-NEXT: add a7, a7, t0
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 3
; RV32-NEXT: mv t0, a7
; RV32-NEXT: slli a7, a7, 4
; RV32-NEXT: add a7, a7, t0
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 7
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vl8r.v v24, (a7) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr a7, vlenb
; RV32-NEXT: slli a7, a7, 3
; RV32-NEXT: mv t0, a7
; RV32-NEXT: slli a7, a7, 1
; RV32-NEXT: add t0, t0, a7
; RV32-NEXT: slli a7, a7, 1
; RV32-NEXT: add t0, t0, a7
; RV32-NEXT: slli a7, a7, 2
; RV32-NEXT: add a7, a7, t0
; RV32-NEXT: add a7, sp, a7
; RV32-NEXT: addi a7, a7, 288
; RV32-NEXT: vs8r.v v24, (a7) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (a6), zero
; RV32-NEXT: csrr a6, vlenb
; RV32-NEXT: slli a6, a6, 7
; RV32-NEXT: add a6, sp, a6
; RV32-NEXT: addi a6, a6, 288
; RV32-NEXT: vs8r.v v24, (a6) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v0, (a5), zero
; RV32-NEXT: vlse64.v v24, (a4), zero
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 4
; RV32-NEXT: mv a5, a4
; RV32-NEXT: slli a4, a4, 1
; RV32-NEXT: add a5, a5, a4
; RV32-NEXT: slli a4, a4, 2
; RV32-NEXT: add a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 288
; RV32-NEXT: vs8r.v v24, (a4) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (a1), zero
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: mv a4, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a4, a4, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a4, a4, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 7
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: vand.vv v24, v16, v0
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: mv a4, a1
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: mv a4, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a4, a4, a1
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 7
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: mv a4, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a4, a4, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a4, a4, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: mv a4, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a4, a4, a1
; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (s10), zero
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v0, (a3), zero
; RV32-NEXT: vlse64.v v24, (a2), zero
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: mv a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a2, a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a2, a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: vlse64.v v24, (s7), zero
; RV32-NEXT: addi a1, sp, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: vand.vv v24, v16, v0
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 6
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: mv a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a2, a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a2, a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v24, v16, v24
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: mv a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a2, a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a2, a2, a1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 288
; RV32-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
; RV32-NEXT: addi a1, sp, 288
; RV32-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vand.vv v0, v16, v24
; RV32-NEXT: vand.vx v16, v16, a0
; RV32-NEXT: vmul.vv v16, v8, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vxor.vv v16, v24, v16
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 8
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 6
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 7
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 6
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 288
; RV32-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; RV32-NEXT: vmul.vv v24, v8, v24
; RV32-NEXT: vxor.vv v16, v16, v24
; RV32-NEXT: vmul.vv v8, v8, v0
; RV32-NEXT: vxor.vv v8, v16, v8
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: mv a1, a0
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: lw ra, 348(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 344(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 340(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 336(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s3, 332(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s4, 328(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s5, 324(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s6, 320(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s7, 316(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s8, 312(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s9, 308(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s10, 304(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s11, 300(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 352
; RV32-NEXT: ret
;
; RV64-LABEL: clmul_nxv8i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; RV64-NEXT: vand.vi v24, v16, 2
; RV64-NEXT: vand.vi v0, v16, 1
; RV64-NEXT: vmul.vv v24, v8, v24
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v0, v24
; RV64-NEXT: vand.vi v0, v16, 4
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vi v0, v16, 8
; RV64-NEXT: li a0, 16
; RV64-NEXT: li a1, 32
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a0
; RV64-NEXT: li a0, 64
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: li a1, 128
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a0
; RV64-NEXT: li a0, 256
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: li a1, 512
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a0
; RV64-NEXT: li a2, 1024
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: li a0, 1
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a2
; RV64-NEXT: slli a1, a0, 11
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 1
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 2
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 4
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 8
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 16
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 32
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 64
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 128
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 256
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 512
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 1024
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 2048
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 4096
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 8192
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 16384
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 32768
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 65536
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 131072
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: lui a1, 262144
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 31
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 32
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 33
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 34
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 35
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 36
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 37
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 38
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 39
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 40
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 41
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 42
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 43
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 44
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 45
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 46
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 47
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 48
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 49
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 50
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 51
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 52
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 53
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 54
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 55
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 56
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 57
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 58
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 59
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 60
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: slli a1, a0, 61
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a1
; RV64-NEXT: li a1, -1
; RV64-NEXT: slli a0, a0, 62
; RV64-NEXT: slli a1, a1, 63
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vand.vx v0, v16, a0
; RV64-NEXT: vand.vx v16, v16, a1
; RV64-NEXT: vmul.vv v0, v8, v0
; RV64-NEXT: vxor.vv v24, v24, v0
; RV64-NEXT: vmul.vv v8, v8, v16
; RV64-NEXT: vxor.vv v8, v24, v8
; RV64-NEXT: ret
%a = call <vscale x 8 x i64> @llvm.clmul.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y)
ret <vscale x 8 x i64> %a
}
define <vscale x 4 x i8> @clmul_nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b) nounwind {
; CHECK-LABEL: clmul_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vand.vi v10, v9, 2
; CHECK-NEXT: vand.vi v11, v9, 1
; CHECK-NEXT: vmul.vv v10, v8, v10
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v11, v10
; CHECK-NEXT: vand.vi v11, v9, 4
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vi v11, v9, 8
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vand.vx v9, v9, a0
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vxor.vv v8, v10, v8
; CHECK-NEXT: ret
%res = call <vscale x 4 x i8> @llvm.clmul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b)
ret <vscale x 4 x i8> %res
}
define <vscale x 4 x i16> @clmul_nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) nounwind {
; CHECK-LABEL: clmul_nxv4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vand.vi v10, v9, 2
; CHECK-NEXT: vand.vi v11, v9, 1
; CHECK-NEXT: vmul.vv v10, v8, v10
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v11, v10
; CHECK-NEXT: vand.vi v11, v9, 4
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vi v11, v9, 8
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 256
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 512
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 1024
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: slli a0, a0, 11
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 1
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 2
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 4
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vand.vx v11, v9, a0
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: vand.vx v9, v9, a0
; CHECK-NEXT: vmul.vv v11, v8, v11
; CHECK-NEXT: vxor.vv v10, v10, v11
; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vxor.vv v8, v10, v8
; CHECK-NEXT: ret
%res = call <vscale x 4 x i16> @llvm.clmul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b)
ret <vscale x 4 x i16> %res
}
define <vscale x 4 x i8> @clmulr_nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b) nounwind {
; CHECK-LABEL: clmulr_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: vzext.vf2 v8, v9
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vand.vi v9, v8, 2
; CHECK-NEXT: vand.vi v11, v8, 1
; CHECK-NEXT: vmul.vv v9, v10, v9
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v11, v9
; CHECK-NEXT: vand.vi v11, v8, 4
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v9, v11
; CHECK-NEXT: vand.vi v11, v8, 8
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v9, v11
; CHECK-NEXT: vand.vx v11, v8, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v9, v11
; CHECK-NEXT: vand.vx v11, v8, a0
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v9, v11
; CHECK-NEXT: vand.vx v11, v8, a0
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vand.vx v8, v8, a0
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v9, v11
; CHECK-NEXT: vmul.vv v8, v10, v8
; CHECK-NEXT: vxor.vv v8, v9, v8
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 7
; CHECK-NEXT: ret
%a.ext = zext <vscale x 4 x i8> %a to <vscale x 4 x i16>
%b.ext = zext <vscale x 4 x i8> %b to <vscale x 4 x i16>
%clmul = call <vscale x 4 x i16> @llvm.clmul.nxv4i8(<vscale x 4 x i16> %a.ext, <vscale x 4 x i16> %b.ext)
%res.ext = lshr <vscale x 4 x i16> %clmul, splat(i16 7)
%res = trunc <vscale x 4 x i16> %res.ext to <vscale x 4 x i8>
ret <vscale x 4 x i8> %res
}
define <vscale x 4 x i8> @clmulh_nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b) nounwind {
; CHECK-LABEL: clmulh_nxv4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; CHECK-NEXT: vzext.vf2 v10, v8
; CHECK-NEXT: vzext.vf2 v8, v9
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: vand.vi v9, v8, 2
; CHECK-NEXT: vand.vi v11, v8, 1
; CHECK-NEXT: vmul.vv v9, v10, v9
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v11, v9
; CHECK-NEXT: vand.vi v11, v8, 4
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v9, v11
; CHECK-NEXT: vand.vi v11, v8, 8
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v9, v11
; CHECK-NEXT: vand.vx v11, v8, a0
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v9, v11
; CHECK-NEXT: vand.vx v11, v8, a0
; CHECK-NEXT: li a0, 64
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v9, v11
; CHECK-NEXT: vand.vx v11, v8, a0
; CHECK-NEXT: li a0, 128
; CHECK-NEXT: vand.vx v8, v8, a0
; CHECK-NEXT: vmul.vv v11, v10, v11
; CHECK-NEXT: vxor.vv v9, v9, v11
; CHECK-NEXT: vmul.vv v8, v10, v8
; CHECK-NEXT: vxor.vv v8, v9, v8
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v8, 8
; CHECK-NEXT: ret
%a.ext = zext <vscale x 4 x i8> %a to <vscale x 4 x i16>
%b.ext = zext <vscale x 4 x i8> %b to <vscale x 4 x i16>
%clmul = call <vscale x 4 x i16> @llvm.clmul.nxv4i8(<vscale x 4 x i16> %a.ext, <vscale x 4 x i16> %b.ext)
%res.ext = lshr <vscale x 4 x i16> %clmul, splat(i16 8)
%res = trunc <vscale x 4 x i16> %res.ext to <vscale x 4 x i8>
ret <vscale x 4 x i8> %res
}