[RISCV] Spilling for RISC-V V extension. (2nd version)

Differential Revision: https://reviews.llvm.org/D95148
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
new file mode 100644
index 0000000..3588818
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d -mattr=+d -O0 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d -mattr=+d -O2 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
+
+@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+
+define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double> %b, <vscale x 1 x double> %c, i32 %gvl) nounwind
+; SPILL-O0-LABEL: foo:
+; SPILL-O0:       # %bb.0:
+; SPILL-O0-NEXT:    addi sp, sp, -16
+; SPILL-O0-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; SPILL-O0-NEXT:    csrr a1, vlenb
+; SPILL-O0-NEXT:    slli a1, a1, 1
+; SPILL-O0-NEXT:    sub sp, sp, a1
+; SPILL-O0-NEXT:    csrr a1, vlenb
+; SPILL-O0-NEXT:    slli a1, a1, 1
+; SPILL-O0-NEXT:    add a1, sp, a1
+; SPILL-O0-NEXT:    sw a0, 8(a1) # 4-byte Folded Spill
+; SPILL-O0-NEXT:    csrr a1, vlenb
+; SPILL-O0-NEXT:    add a1, sp, a1
+; SPILL-O0-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; SPILL-O0-NEXT:    vfadd.vv v25, v8, v9
+; SPILL-O0-NEXT:    vs1r.v v25, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    lui a0, %hi(.L.str)
+; SPILL-O0-NEXT:    addi a0, a0, %lo(.L.str)
+; SPILL-O0-NEXT:    call puts@plt
+; SPILL-O0-NEXT:    vl1r.v v25, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a1, vlenb
+; SPILL-O0-NEXT:    add a1, sp, a1
+; SPILL-O0-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    # kill: def $x11 killed $x10
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 1
+; SPILL-O0-NEXT:    add a0, sp, a0
+; SPILL-O0-NEXT:    lw a0, 8(a0) # 4-byte Folded Reload
+; SPILL-O0-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; SPILL-O0-NEXT:    vfadd.vv v8, v8, v25
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 1
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; SPILL-O0-NEXT:    addi sp, sp, 16
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: foo:
+; SPILL-O2:       # %bb.0:
+; SPILL-O2-NEXT:    addi sp, sp, -16
+; SPILL-O2-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; SPILL-O2-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; SPILL-O2-NEXT:    csrr a1, vlenb
+; SPILL-O2-NEXT:    slli a1, a1, 1
+; SPILL-O2-NEXT:    sub sp, sp, a1
+; SPILL-O2-NEXT:    mv s0, a0
+; SPILL-O2-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; SPILL-O2-NEXT:    vfadd.vv v25, v8, v9
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    vs1r.v v25, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    lui a0, %hi(.L.str)
+; SPILL-O2-NEXT:    addi a0, a0, %lo(.L.str)
+; SPILL-O2-NEXT:    call puts@plt
+; SPILL-O2-NEXT:    vsetvli a0, s0, e64,m1,ta,mu
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    vl1r.v v25, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    vl1r.v v26, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    vfadd.vv v8, v26, v25
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; SPILL-O2-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; SPILL-O2-NEXT:    addi sp, sp, 16
+; SPILL-O2-NEXT:    ret
+{
+   %x = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i32 %gvl)
+   %call = call signext i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
+   %z = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %x, i32 %gvl)
+   ret <vscale x 1 x double> %z
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i32 %gvl)
+declare i32 @puts(i8*);
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll
new file mode 100644
index 0000000..3058979
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -O0 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -O2 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
+
+define <vscale x 1 x i32> @spill_lmul_mf2(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O0-LABEL: spill_lmul_mf2:
+; SPILL-O0:       # %bb.0: # %entry
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    sub sp, sp, a0
+; SPILL-O0-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    #APP
+; SPILL-O0-NEXT:    #NO_APP
+; SPILL-O0-NEXT:    vl1r.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: spill_lmul_mf2:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl1r.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 1 x i32> %va
+}
+
+define <vscale x 2 x i32> @spill_lmul_1(<vscale x 2 x i32> %va) nounwind {
+; SPILL-O0-LABEL: spill_lmul_1:
+; SPILL-O0:       # %bb.0: # %entry
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    sub sp, sp, a0
+; SPILL-O0-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    #APP
+; SPILL-O0-NEXT:    #NO_APP
+; SPILL-O0-NEXT:    vl1r.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: spill_lmul_1:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl1r.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 2 x i32> %va
+}
+
+define <vscale x 4 x i32> @spill_lmul_2(<vscale x 4 x i32> %va) nounwind {
+; SPILL-O0-LABEL: spill_lmul_2:
+; SPILL-O0:       # %bb.0: # %entry
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 1
+; SPILL-O0-NEXT:    sub sp, sp, a0
+; SPILL-O0-NEXT:    vs2r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    #APP
+; SPILL-O0-NEXT:    #NO_APP
+; SPILL-O0-NEXT:    vl2re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 1
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: spill_lmul_2:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    vs2r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl2re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 4 x i32> %va
+}
+
+define <vscale x 8 x i32> @spill_lmul_4(<vscale x 8 x i32> %va) nounwind {
+; SPILL-O0-LABEL: spill_lmul_4:
+; SPILL-O0:       # %bb.0: # %entry
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 2
+; SPILL-O0-NEXT:    sub sp, sp, a0
+; SPILL-O0-NEXT:    vs4r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    #APP
+; SPILL-O0-NEXT:    #NO_APP
+; SPILL-O0-NEXT:    vl4re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 2
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: spill_lmul_4:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 2
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    vs4r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl4re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 2
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 8 x i32> %va
+}
+
+define <vscale x 16 x i32> @spill_lmul_8(<vscale x 16 x i32> %va) nounwind {
+; SPILL-O0-LABEL: spill_lmul_8:
+; SPILL-O0:       # %bb.0: # %entry
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 3
+; SPILL-O0-NEXT:    sub sp, sp, a0
+; SPILL-O0-NEXT:    vs8r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    #APP
+; SPILL-O0-NEXT:    #NO_APP
+; SPILL-O0-NEXT:    vl8re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 3
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: spill_lmul_8:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 3
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    vs8r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl8re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 3
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 16 x i32> %va
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
new file mode 100644
index 0000000..7a55a29
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -mattr=+d -O0 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -mattr=+d -O2 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
+
+@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+
+define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double> %b, <vscale x 1 x double> %c, i64 %gvl) nounwind
+; SPILL-O0-LABEL: foo:
+; SPILL-O0:       # %bb.0:
+; SPILL-O0-NEXT:    addi sp, sp, -16
+; SPILL-O0-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; SPILL-O0-NEXT:    csrr a1, vlenb
+; SPILL-O0-NEXT:    slli a1, a1, 1
+; SPILL-O0-NEXT:    sub sp, sp, a1
+; SPILL-O0-NEXT:    csrr a1, vlenb
+; SPILL-O0-NEXT:    slli a1, a1, 1
+; SPILL-O0-NEXT:    add a1, sp, a1
+; SPILL-O0-NEXT:    sd a0, 0(a1) # 8-byte Folded Spill
+; SPILL-O0-NEXT:    csrr a1, vlenb
+; SPILL-O0-NEXT:    add a1, sp, a1
+; SPILL-O0-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; SPILL-O0-NEXT:    vfadd.vv v25, v8, v9
+; SPILL-O0-NEXT:    vs1r.v v25, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    lui a0, %hi(.L.str)
+; SPILL-O0-NEXT:    addi a0, a0, %lo(.L.str)
+; SPILL-O0-NEXT:    call puts@plt
+; SPILL-O0-NEXT:    vl1r.v v25, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a1, vlenb
+; SPILL-O0-NEXT:    add a1, sp, a1
+; SPILL-O0-NEXT:    vl1r.v v8, (a1) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    # kill: def $x11 killed $x10
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 1
+; SPILL-O0-NEXT:    add a0, sp, a0
+; SPILL-O0-NEXT:    ld a0, 0(a0) # 8-byte Folded Reload
+; SPILL-O0-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; SPILL-O0-NEXT:    vfadd.vv v8, v8, v25
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 1
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; SPILL-O0-NEXT:    addi sp, sp, 16
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: foo:
+; SPILL-O2:       # %bb.0:
+; SPILL-O2-NEXT:    addi sp, sp, -16
+; SPILL-O2-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; SPILL-O2-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; SPILL-O2-NEXT:    csrr a1, vlenb
+; SPILL-O2-NEXT:    slli a1, a1, 1
+; SPILL-O2-NEXT:    sub sp, sp, a1
+; SPILL-O2-NEXT:    mv s0, a0
+; SPILL-O2-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; SPILL-O2-NEXT:    vfadd.vv v25, v8, v9
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    vs1r.v v25, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    lui a0, %hi(.L.str)
+; SPILL-O2-NEXT:    addi a0, a0, %lo(.L.str)
+; SPILL-O2-NEXT:    call puts@plt
+; SPILL-O2-NEXT:    vsetvli a0, s0, e64,m1,ta,mu
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add a0, sp, a0
+; SPILL-O2-NEXT:    vl1r.v v25, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    vl1r.v v26, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    vfadd.vv v8, v26, v25
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; SPILL-O2-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; SPILL-O2-NEXT:    addi sp, sp, 16
+; SPILL-O2-NEXT:    ret
+{
+   %x = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %gvl)
+   %call = call signext i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
+   %z = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %x, i64 %gvl)
+   ret <vscale x 1 x double> %z
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 %gvl)
+declare i32 @puts(i8*);
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll
new file mode 100644
index 0000000..260cf31
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -O0 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -O2 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
+
+define <vscale x 1 x i64> @spill_lmul_1(<vscale x 1 x i64> %va) nounwind {
+; SPILL-O0-LABEL: spill_lmul_1:
+; SPILL-O0:       # %bb.0: # %entry
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    sub sp, sp, a0
+; SPILL-O0-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    #APP
+; SPILL-O0-NEXT:    #NO_APP
+; SPILL-O0-NEXT:    vl1r.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: spill_lmul_1:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl1r.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 1 x i64> %va
+}
+
+define <vscale x 2 x i64> @spill_lmul_2(<vscale x 2 x i64> %va) nounwind {
+; SPILL-O0-LABEL: spill_lmul_2:
+; SPILL-O0:       # %bb.0: # %entry
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 1
+; SPILL-O0-NEXT:    sub sp, sp, a0
+; SPILL-O0-NEXT:    vs2r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    #APP
+; SPILL-O0-NEXT:    #NO_APP
+; SPILL-O0-NEXT:    vl2re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 1
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: spill_lmul_2:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    vs2r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl2re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 1
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 2 x i64> %va
+}
+
+define <vscale x 4 x i64> @spill_lmul_4(<vscale x 4 x i64> %va) nounwind {
+; SPILL-O0-LABEL: spill_lmul_4:
+; SPILL-O0:       # %bb.0: # %entry
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 2
+; SPILL-O0-NEXT:    sub sp, sp, a0
+; SPILL-O0-NEXT:    vs4r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    #APP
+; SPILL-O0-NEXT:    #NO_APP
+; SPILL-O0-NEXT:    vl4re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 2
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: spill_lmul_4:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 2
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    vs4r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl4re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 2
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 4 x i64> %va
+}
+
+define <vscale x 8 x i64> @spill_lmul_8(<vscale x 8 x i64> %va) nounwind {
+; SPILL-O0-LABEL: spill_lmul_8:
+; SPILL-O0:       # %bb.0: # %entry
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 3
+; SPILL-O0-NEXT:    sub sp, sp, a0
+; SPILL-O0-NEXT:    vs8r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O0-NEXT:    #APP
+; SPILL-O0-NEXT:    #NO_APP
+; SPILL-O0-NEXT:    vl8re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O0-NEXT:    csrr a0, vlenb
+; SPILL-O0-NEXT:    slli a0, a0, 3
+; SPILL-O0-NEXT:    add sp, sp, a0
+; SPILL-O0-NEXT:    ret
+;
+; SPILL-O2-LABEL: spill_lmul_8:
+; SPILL-O2:       # %bb.0: # %entry
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 3
+; SPILL-O2-NEXT:    sub sp, sp, a0
+; SPILL-O2-NEXT:    vs8r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-NEXT:    #APP
+; SPILL-O2-NEXT:    #NO_APP
+; SPILL-O2-NEXT:    vl8re8.v v8, (sp) # Unknown-size Folded Reload
+; SPILL-O2-NEXT:    csrr a0, vlenb
+; SPILL-O2-NEXT:    slli a0, a0, 3
+; SPILL-O2-NEXT:    add sp, sp, a0
+; SPILL-O2-NEXT:    ret
+entry:
+  call void asm sideeffect "",
+  "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+  ret <vscale x 8 x i64> %va
+}
diff --git a/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll b/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll
new file mode 100644
index 0000000..bcfcec5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh \
+; RUN:   -verify-machineinstrs --riscv-no-aliases < %s \
+; RUN:   | FileCheck %s
+
+declare half @llvm.riscv.vfmv.f.s.nxv1f16(<vscale x 1 x half>)
+declare float @llvm.riscv.vfmv.f.s.nxv1f32(<vscale x 1 x float>)
+declare double @llvm.riscv.vfmv.f.s.nxv1f64(<vscale x 1 x double>)
+
+declare <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(half, i64);
+declare <vscale x 1 x float> @llvm.riscv.vfmv.v.f.nxv1f32(float, i64);
+declare <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64(double, i64);
+
+define <vscale x 1 x half> @intrinsic_vfmv.f.s_s_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    vsetvli zero, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
+; CHECK-NEXT:    fsh ft0, 14(sp) # 2-byte Folded Spill
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT:    flh ft0, 14(sp) # 2-byte Folded Reload
+; CHECK-NEXT:    vfmv.v.f v8, ft0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call half @llvm.riscv.vfmv.f.s.nxv1f16(<vscale x 1 x half> %0)
+  tail call void asm sideeffect "", "~{f0_d},~{f1_d},~{f2_d},~{f3_d},~{f4_d},~{f5_d},~{f6_d},~{f7_d},~{f8_d},~{f9_d},~{f10_d},~{f11_d},~{f12_d},~{f13_d},~{f14_d},~{f15_d},~{f16_d},~{f17_d},~{f18_d},~{f19_d},~{f20_d},~{f21_d},~{f22_d},~{f23_d},~{f24_d},~{f25_d},~{f26_d},~{f27_d},~{f28_d},~{f29_d},~{f30_d},~{f31_d}"()
+  %b = call <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(half %a, i64 %1)
+  ret <vscale x 1 x half> %b
+}
+
+define <vscale x 1 x float> @intrinsic_vfmv.f.s_s_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    vsetvli zero, zero, e32,mf2,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
+; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    vfmv.v.f v8, ft0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call float @llvm.riscv.vfmv.f.s.nxv1f32(<vscale x 1 x float> %0)
+  tail call void asm sideeffect "", "~{f0_d},~{f1_d},~{f2_d},~{f3_d},~{f4_d},~{f5_d},~{f6_d},~{f7_d},~{f8_d},~{f9_d},~{f10_d},~{f11_d},~{f12_d},~{f13_d},~{f14_d},~{f15_d},~{f16_d},~{f17_d},~{f18_d},~{f19_d},~{f20_d},~{f21_d},~{f22_d},~{f23_d},~{f24_d},~{f25_d},~{f26_d},~{f27_d},~{f28_d},~{f29_d},~{f30_d},~{f31_d}"()
+  %b = call <vscale x 1 x float> @llvm.riscv.vfmv.v.f.nxv1f32(float %a, i64 %1)
+  ret <vscale x 1 x float> %b
+}
+
+define <vscale x 1 x double> @intrinsic_vfmv.f.s_s_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    vsetvli zero, zero, e64,m1,ta,mu
+; CHECK-NEXT:    vfmv.f.s ft0, v8
+; CHECK-NEXT:    fsd ft0, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT:    fld ft0, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    vfmv.v.f v8, ft0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    jalr zero, 0(ra)
+entry:
+  %a = call double @llvm.riscv.vfmv.f.s.nxv1f64(<vscale x 1 x double> %0)
+  tail call void asm sideeffect "", "~{f0_d},~{f1_d},~{f2_d},~{f3_d},~{f4_d},~{f5_d},~{f6_d},~{f7_d},~{f8_d},~{f9_d},~{f10_d},~{f11_d},~{f12_d},~{f13_d},~{f14_d},~{f15_d},~{f16_d},~{f17_d},~{f18_d},~{f19_d},~{f20_d},~{f21_d},~{f22_d},~{f23_d},~{f24_d},~{f25_d},~{f26_d},~{f27_d},~{f28_d},~{f29_d},~{f30_d},~{f31_d}"()
+  %b = call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64(double %a, i64 %1)
+  ret <vscale x 1 x double> %b
+}