blob: 02f03602e84ada62a95de7b28b98dce4c28b4afb [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
; RUN: | FileCheck %s -check-prefix=RV64IV
; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \
; RUN: | FileCheck %s -check-prefix=RV32IV
; Tests adapted from AArch64.
; Test prolog sequences for stack probing when vector is involved.
; The space for vector objects needs probing in the general case, because
; the stack adjustment may happen to be too big (i.e. greater than the
; probe size).
define void @f_vector(ptr %out) #0 {
; RV64IV-LABEL: f_vector:
; RV64IV: # %bb.0: # %entry
; RV64IV-NEXT: csrr t1, vlenb
; RV64IV-NEXT: slli t1, t1, 1
; RV64IV-NEXT: .cfi_def_cfa t1, -16
; RV64IV-NEXT: lui t2, 1
; RV64IV-NEXT: .LBB0_1: # %entry
; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IV-NEXT: sub sp, sp, t2
; RV64IV-NEXT: sd zero, 0(sp)
; RV64IV-NEXT: sub t1, t1, t2
; RV64IV-NEXT: bge t1, t2, .LBB0_1
; RV64IV-NEXT: # %bb.2: # %entry
; RV64IV-NEXT: .cfi_def_cfa_register sp
; RV64IV-NEXT: sub sp, sp, t1
; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb
; RV64IV-NEXT: csrr a0, vlenb
; RV64IV-NEXT: slli a0, a0, 1
; RV64IV-NEXT: add sp, sp, a0
; RV64IV-NEXT: .cfi_def_cfa sp, 0
; RV64IV-NEXT: ret
;
; RV32IV-LABEL: f_vector:
; RV32IV: # %bb.0: # %entry
; RV32IV-NEXT: csrr t1, vlenb
; RV32IV-NEXT: slli t1, t1, 1
; RV32IV-NEXT: .cfi_def_cfa t1, -16
; RV32IV-NEXT: lui t2, 1
; RV32IV-NEXT: .LBB0_1: # %entry
; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IV-NEXT: sub sp, sp, t2
; RV32IV-NEXT: sw zero, 0(sp)
; RV32IV-NEXT: sub t1, t1, t2
; RV32IV-NEXT: bge t1, t2, .LBB0_1
; RV32IV-NEXT: # %bb.2: # %entry
; RV32IV-NEXT: .cfi_def_cfa_register sp
; RV32IV-NEXT: sub sp, sp, t1
; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb
; RV32IV-NEXT: csrr a0, vlenb
; RV32IV-NEXT: slli a0, a0, 1
; RV32IV-NEXT: add sp, sp, a0
; RV32IV-NEXT: .cfi_def_cfa sp, 0
; RV32IV-NEXT: ret
entry:
%vec = alloca <vscale x 4 x float>, align 16
ret void
}
; As above, but with 4 vectors of stack space.
define void @f4_vector(ptr %out) #0 {
; RV64IV-LABEL: f4_vector:
; RV64IV: # %bb.0: # %entry
; RV64IV-NEXT: csrr t1, vlenb
; RV64IV-NEXT: slli t1, t1, 3
; RV64IV-NEXT: .cfi_def_cfa t1, -64
; RV64IV-NEXT: lui t2, 1
; RV64IV-NEXT: .LBB1_1: # %entry
; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IV-NEXT: sub sp, sp, t2
; RV64IV-NEXT: sd zero, 0(sp)
; RV64IV-NEXT: sub t1, t1, t2
; RV64IV-NEXT: bge t1, t2, .LBB1_1
; RV64IV-NEXT: # %bb.2: # %entry
; RV64IV-NEXT: .cfi_def_cfa_register sp
; RV64IV-NEXT: sub sp, sp, t1
; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb
; RV64IV-NEXT: csrr a0, vlenb
; RV64IV-NEXT: slli a0, a0, 3
; RV64IV-NEXT: add sp, sp, a0
; RV64IV-NEXT: .cfi_def_cfa sp, 0
; RV64IV-NEXT: ret
;
; RV32IV-LABEL: f4_vector:
; RV32IV: # %bb.0: # %entry
; RV32IV-NEXT: csrr t1, vlenb
; RV32IV-NEXT: slli t1, t1, 3
; RV32IV-NEXT: .cfi_def_cfa t1, -64
; RV32IV-NEXT: lui t2, 1
; RV32IV-NEXT: .LBB1_1: # %entry
; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IV-NEXT: sub sp, sp, t2
; RV32IV-NEXT: sw zero, 0(sp)
; RV32IV-NEXT: sub t1, t1, t2
; RV32IV-NEXT: bge t1, t2, .LBB1_1
; RV32IV-NEXT: # %bb.2: # %entry
; RV32IV-NEXT: .cfi_def_cfa_register sp
; RV32IV-NEXT: sub sp, sp, t1
; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb
; RV32IV-NEXT: csrr a0, vlenb
; RV32IV-NEXT: slli a0, a0, 3
; RV32IV-NEXT: add sp, sp, a0
; RV32IV-NEXT: .cfi_def_cfa sp, 0
; RV32IV-NEXT: ret
entry:
%vec1 = alloca <vscale x 4 x float>, align 16
%vec2 = alloca <vscale x 4 x float>, align 16
%vec3 = alloca <vscale x 4 x float>, align 16
%vec4 = alloca <vscale x 4 x float>, align 16
ret void
}
; As above, but with 16 vectors of stack space.
; The stack adjustment is less than or equal to 16 x 256 = 4096, so
; we can allocate the locals at once.
define void @f16_vector(ptr %out) #0 {
; RV64IV-LABEL: f16_vector:
; RV64IV: # %bb.0: # %entry
; RV64IV-NEXT: csrr t1, vlenb
; RV64IV-NEXT: slli t1, t1, 5
; RV64IV-NEXT: .cfi_def_cfa t1, -256
; RV64IV-NEXT: lui t2, 1
; RV64IV-NEXT: .LBB2_1: # %entry
; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IV-NEXT: sub sp, sp, t2
; RV64IV-NEXT: sd zero, 0(sp)
; RV64IV-NEXT: sub t1, t1, t2
; RV64IV-NEXT: bge t1, t2, .LBB2_1
; RV64IV-NEXT: # %bb.2: # %entry
; RV64IV-NEXT: .cfi_def_cfa_register sp
; RV64IV-NEXT: sub sp, sp, t1
; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb
; RV64IV-NEXT: csrr a0, vlenb
; RV64IV-NEXT: slli a0, a0, 5
; RV64IV-NEXT: add sp, sp, a0
; RV64IV-NEXT: .cfi_def_cfa sp, 0
; RV64IV-NEXT: ret
;
; RV32IV-LABEL: f16_vector:
; RV32IV: # %bb.0: # %entry
; RV32IV-NEXT: csrr t1, vlenb
; RV32IV-NEXT: slli t1, t1, 5
; RV32IV-NEXT: .cfi_def_cfa t1, -256
; RV32IV-NEXT: lui t2, 1
; RV32IV-NEXT: .LBB2_1: # %entry
; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IV-NEXT: sub sp, sp, t2
; RV32IV-NEXT: sw zero, 0(sp)
; RV32IV-NEXT: sub t1, t1, t2
; RV32IV-NEXT: bge t1, t2, .LBB2_1
; RV32IV-NEXT: # %bb.2: # %entry
; RV32IV-NEXT: .cfi_def_cfa_register sp
; RV32IV-NEXT: sub sp, sp, t1
; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb
; RV32IV-NEXT: csrr a0, vlenb
; RV32IV-NEXT: slli a0, a0, 5
; RV32IV-NEXT: add sp, sp, a0
; RV32IV-NEXT: .cfi_def_cfa sp, 0
; RV32IV-NEXT: ret
entry:
%vec1 = alloca <vscale x 4 x float>, align 16
%vec2 = alloca <vscale x 4 x float>, align 16
%vec3 = alloca <vscale x 4 x float>, align 16
%vec4 = alloca <vscale x 4 x float>, align 16
%vec5 = alloca <vscale x 4 x float>, align 16
%vec6 = alloca <vscale x 4 x float>, align 16
%vec7 = alloca <vscale x 4 x float>, align 16
%vec8 = alloca <vscale x 4 x float>, align 16
%vec9 = alloca <vscale x 4 x float>, align 16
%vec10 = alloca <vscale x 4 x float>, align 16
%vec11 = alloca <vscale x 4 x float>, align 16
%vec12 = alloca <vscale x 4 x float>, align 16
%vec13 = alloca <vscale x 4 x float>, align 16
%vec14 = alloca <vscale x 4 x float>, align 16
%vec15 = alloca <vscale x 4 x float>, align 16
%vec16 = alloca <vscale x 4 x float>, align 16
ret void
}
; As above, but with 17 vectors of stack space.
define void @f17_vector(ptr %out) #0 {
; RV64IV-LABEL: f17_vector:
; RV64IV: # %bb.0: # %entry
; RV64IV-NEXT: csrr t1, vlenb
; RV64IV-NEXT: li a0, 34
; RV64IV-NEXT: mul t1, t1, a0
; RV64IV-NEXT: .cfi_def_cfa t1, -272
; RV64IV-NEXT: lui t2, 1
; RV64IV-NEXT: .LBB3_1: # %entry
; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IV-NEXT: sub sp, sp, t2
; RV64IV-NEXT: sd zero, 0(sp)
; RV64IV-NEXT: sub t1, t1, t2
; RV64IV-NEXT: bge t1, t2, .LBB3_1
; RV64IV-NEXT: # %bb.2: # %entry
; RV64IV-NEXT: .cfi_def_cfa_register sp
; RV64IV-NEXT: sub sp, sp, t1
; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb
; RV64IV-NEXT: csrr a0, vlenb
; RV64IV-NEXT: li a1, 34
; RV64IV-NEXT: mul a0, a0, a1
; RV64IV-NEXT: add sp, sp, a0
; RV64IV-NEXT: .cfi_def_cfa sp, 0
; RV64IV-NEXT: ret
;
; RV32IV-LABEL: f17_vector:
; RV32IV: # %bb.0: # %entry
; RV32IV-NEXT: csrr t1, vlenb
; RV32IV-NEXT: li a0, 34
; RV32IV-NEXT: mul t1, t1, a0
; RV32IV-NEXT: .cfi_def_cfa t1, -272
; RV32IV-NEXT: lui t2, 1
; RV32IV-NEXT: .LBB3_1: # %entry
; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IV-NEXT: sub sp, sp, t2
; RV32IV-NEXT: sw zero, 0(sp)
; RV32IV-NEXT: sub t1, t1, t2
; RV32IV-NEXT: bge t1, t2, .LBB3_1
; RV32IV-NEXT: # %bb.2: # %entry
; RV32IV-NEXT: .cfi_def_cfa_register sp
; RV32IV-NEXT: sub sp, sp, t1
; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb
; RV32IV-NEXT: csrr a0, vlenb
; RV32IV-NEXT: li a1, 34
; RV32IV-NEXT: mul a0, a0, a1
; RV32IV-NEXT: add sp, sp, a0
; RV32IV-NEXT: .cfi_def_cfa sp, 0
; RV32IV-NEXT: ret
entry:
%vec1 = alloca <vscale x 4 x float>, align 16
%vec2 = alloca <vscale x 4 x float>, align 16
%vec3 = alloca <vscale x 4 x float>, align 16
%vec4 = alloca <vscale x 4 x float>, align 16
%vec5 = alloca <vscale x 4 x float>, align 16
%vec6 = alloca <vscale x 4 x float>, align 16
%vec7 = alloca <vscale x 4 x float>, align 16
%vec8 = alloca <vscale x 4 x float>, align 16
%vec9 = alloca <vscale x 4 x float>, align 16
%vec10 = alloca <vscale x 4 x float>, align 16
%vec11 = alloca <vscale x 4 x float>, align 16
%vec12 = alloca <vscale x 4 x float>, align 16
%vec13 = alloca <vscale x 4 x float>, align 16
%vec14 = alloca <vscale x 4 x float>, align 16
%vec15 = alloca <vscale x 4 x float>, align 16
%vec16 = alloca <vscale x 4 x float>, align 16
%vec17 = alloca <vscale x 4 x float>, align 16
ret void
}
; A vector and a 16-byte fixed size object.
define void @f1_vector_16_arr(ptr %out) #0 {
; RV64IV-LABEL: f1_vector_16_arr:
; RV64IV: # %bb.0: # %entry
; RV64IV-NEXT: addi sp, sp, -16
; RV64IV-NEXT: .cfi_def_cfa_offset 16
; RV64IV-NEXT: csrr t1, vlenb
; RV64IV-NEXT: slli t1, t1, 1
; RV64IV-NEXT: .cfi_def_cfa t1, -16
; RV64IV-NEXT: lui t2, 1
; RV64IV-NEXT: .LBB4_1: # %entry
; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IV-NEXT: sub sp, sp, t2
; RV64IV-NEXT: sd zero, 0(sp)
; RV64IV-NEXT: sub t1, t1, t2
; RV64IV-NEXT: bge t1, t2, .LBB4_1
; RV64IV-NEXT: # %bb.2: # %entry
; RV64IV-NEXT: .cfi_def_cfa_register sp
; RV64IV-NEXT: sub sp, sp, t1
; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; RV64IV-NEXT: csrr a0, vlenb
; RV64IV-NEXT: slli a0, a0, 1
; RV64IV-NEXT: add sp, sp, a0
; RV64IV-NEXT: .cfi_def_cfa sp, 16
; RV64IV-NEXT: addi sp, sp, 16
; RV64IV-NEXT: .cfi_def_cfa_offset 0
; RV64IV-NEXT: ret
;
; RV32IV-LABEL: f1_vector_16_arr:
; RV32IV: # %bb.0: # %entry
; RV32IV-NEXT: addi sp, sp, -16
; RV32IV-NEXT: .cfi_def_cfa_offset 16
; RV32IV-NEXT: csrr t1, vlenb
; RV32IV-NEXT: slli t1, t1, 1
; RV32IV-NEXT: .cfi_def_cfa t1, -16
; RV32IV-NEXT: lui t2, 1
; RV32IV-NEXT: .LBB4_1: # %entry
; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IV-NEXT: sub sp, sp, t2
; RV32IV-NEXT: sw zero, 0(sp)
; RV32IV-NEXT: sub t1, t1, t2
; RV32IV-NEXT: bge t1, t2, .LBB4_1
; RV32IV-NEXT: # %bb.2: # %entry
; RV32IV-NEXT: .cfi_def_cfa_register sp
; RV32IV-NEXT: sub sp, sp, t1
; RV32IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
; RV32IV-NEXT: csrr a0, vlenb
; RV32IV-NEXT: slli a0, a0, 1
; RV32IV-NEXT: add sp, sp, a0
; RV32IV-NEXT: .cfi_def_cfa sp, 16
; RV32IV-NEXT: addi sp, sp, 16
; RV32IV-NEXT: .cfi_def_cfa_offset 0
; RV32IV-NEXT: ret
entry:
%vec = alloca <vscale x 4 x float>, align 16
%arr = alloca i8, i64 16, align 1
ret void
}
; A large vector object and a large slot, both of which need probing.
define void @f1_vector_4096_arr(ptr %out) #0 {
; RV64IV-LABEL: f1_vector_4096_arr:
; RV64IV: # %bb.0: # %entry
; RV64IV-NEXT: lui a0, 1
; RV64IV-NEXT: sub sp, sp, a0
; RV64IV-NEXT: sd zero, 0(sp)
; RV64IV-NEXT: .cfi_def_cfa_offset 4096
; RV64IV-NEXT: lui a0, 1
; RV64IV-NEXT: sub sp, sp, a0
; RV64IV-NEXT: sd zero, 0(sp)
; RV64IV-NEXT: .cfi_def_cfa_offset 8192
; RV64IV-NEXT: lui a0, 1
; RV64IV-NEXT: sub sp, sp, a0
; RV64IV-NEXT: sd zero, 0(sp)
; RV64IV-NEXT: .cfi_def_cfa_offset 12288
; RV64IV-NEXT: addi sp, sp, -16
; RV64IV-NEXT: .cfi_def_cfa_offset 12304
; RV64IV-NEXT: csrr t1, vlenb
; RV64IV-NEXT: slli t1, t1, 7
; RV64IV-NEXT: .cfi_def_cfa t1, -1024
; RV64IV-NEXT: lui t2, 1
; RV64IV-NEXT: .LBB5_1: # %entry
; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IV-NEXT: sub sp, sp, t2
; RV64IV-NEXT: sd zero, 0(sp)
; RV64IV-NEXT: sub t1, t1, t2
; RV64IV-NEXT: bge t1, t2, .LBB5_1
; RV64IV-NEXT: # %bb.2: # %entry
; RV64IV-NEXT: .cfi_def_cfa_register sp
; RV64IV-NEXT: sub sp, sp, t1
; RV64IV-NEXT: .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb
; RV64IV-NEXT: csrr a0, vlenb
; RV64IV-NEXT: slli a0, a0, 7
; RV64IV-NEXT: add sp, sp, a0
; RV64IV-NEXT: .cfi_def_cfa sp, 12304
; RV64IV-NEXT: lui a0, 3
; RV64IV-NEXT: addi a0, a0, 16
; RV64IV-NEXT: add sp, sp, a0
; RV64IV-NEXT: .cfi_def_cfa_offset 0
; RV64IV-NEXT: ret
;
; RV32IV-LABEL: f1_vector_4096_arr:
; RV32IV: # %bb.0: # %entry
; RV32IV-NEXT: lui a0, 1
; RV32IV-NEXT: sub sp, sp, a0
; RV32IV-NEXT: sw zero, 0(sp)
; RV32IV-NEXT: .cfi_def_cfa_offset 4096
; RV32IV-NEXT: lui a0, 1
; RV32IV-NEXT: sub sp, sp, a0
; RV32IV-NEXT: sw zero, 0(sp)
; RV32IV-NEXT: .cfi_def_cfa_offset 8192
; RV32IV-NEXT: lui a0, 1
; RV32IV-NEXT: sub sp, sp, a0
; RV32IV-NEXT: sw zero, 0(sp)
; RV32IV-NEXT: .cfi_def_cfa_offset 12288
; RV32IV-NEXT: addi sp, sp, -16
; RV32IV-NEXT: .cfi_def_cfa_offset 12304
; RV32IV-NEXT: csrr t1, vlenb
; RV32IV-NEXT: slli t1, t1, 7
; RV32IV-NEXT: .cfi_def_cfa t1, -1024
; RV32IV-NEXT: lui t2, 1
; RV32IV-NEXT: .LBB5_1: # %entry
; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IV-NEXT: sub sp, sp, t2
; RV32IV-NEXT: sw zero, 0(sp)
; RV32IV-NEXT: sub t1, t1, t2
; RV32IV-NEXT: bge t1, t2, .LBB5_1
; RV32IV-NEXT: # %bb.2: # %entry
; RV32IV-NEXT: .cfi_def_cfa_register sp
; RV32IV-NEXT: sub sp, sp, t1
; RV32IV-NEXT: .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb
; RV32IV-NEXT: csrr a0, vlenb
; RV32IV-NEXT: slli a0, a0, 7
; RV32IV-NEXT: add sp, sp, a0
; RV32IV-NEXT: .cfi_def_cfa sp, 12304
; RV32IV-NEXT: lui a0, 3
; RV32IV-NEXT: addi a0, a0, 16
; RV32IV-NEXT: add sp, sp, a0
; RV32IV-NEXT: .cfi_def_cfa_offset 0
; RV32IV-NEXT: ret
entry:
%vec = alloca <vscale x 256 x float>, align 16
%arr = alloca i8, i64 12288, align 1
ret void
}
attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" }