| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ |
| ; RUN: | FileCheck %s -check-prefix=RV64IV |
| ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \ |
| ; RUN: | FileCheck %s -check-prefix=RV32IV |
| |
| ; Tests adapted from AArch64. |
| |
| ; Test prolog sequences for stack probing when vector is involved. |
| |
| ; The space for vector objects needs probing in the general case, because |
| ; the stack adjustment may happen to be too big (i.e. greater than the |
| ; probe size). |
| |
| define void @f_vector(ptr %out) #0 { |
| ; RV64IV-LABEL: f_vector: |
| ; RV64IV: # %bb.0: # %entry |
| ; RV64IV-NEXT: csrr t1, vlenb |
| ; RV64IV-NEXT: slli t1, t1, 1 |
| ; RV64IV-NEXT: .cfi_def_cfa t1, -16 |
| ; RV64IV-NEXT: lui t2, 1 |
| ; RV64IV-NEXT: .LBB0_1: # %entry |
| ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV64IV-NEXT: sub sp, sp, t2 |
| ; RV64IV-NEXT: sd zero, 0(sp) |
| ; RV64IV-NEXT: sub t1, t1, t2 |
| ; RV64IV-NEXT: bge t1, t2, .LBB0_1 |
| ; RV64IV-NEXT: # %bb.2: # %entry |
| ; RV64IV-NEXT: .cfi_def_cfa_register sp |
| ; RV64IV-NEXT: sub sp, sp, t1 |
| ; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb |
| ; RV64IV-NEXT: csrr a0, vlenb |
| ; RV64IV-NEXT: slli a0, a0, 1 |
| ; RV64IV-NEXT: add sp, sp, a0 |
| ; RV64IV-NEXT: .cfi_def_cfa sp, 0 |
| ; RV64IV-NEXT: ret |
| ; |
| ; RV32IV-LABEL: f_vector: |
| ; RV32IV: # %bb.0: # %entry |
| ; RV32IV-NEXT: csrr t1, vlenb |
| ; RV32IV-NEXT: slli t1, t1, 1 |
| ; RV32IV-NEXT: .cfi_def_cfa t1, -16 |
| ; RV32IV-NEXT: lui t2, 1 |
| ; RV32IV-NEXT: .LBB0_1: # %entry |
| ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV32IV-NEXT: sub sp, sp, t2 |
| ; RV32IV-NEXT: sw zero, 0(sp) |
| ; RV32IV-NEXT: sub t1, t1, t2 |
| ; RV32IV-NEXT: bge t1, t2, .LBB0_1 |
| ; RV32IV-NEXT: # %bb.2: # %entry |
| ; RV32IV-NEXT: .cfi_def_cfa_register sp |
| ; RV32IV-NEXT: sub sp, sp, t1 |
| ; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 2 * vlenb |
| ; RV32IV-NEXT: csrr a0, vlenb |
| ; RV32IV-NEXT: slli a0, a0, 1 |
| ; RV32IV-NEXT: add sp, sp, a0 |
| ; RV32IV-NEXT: .cfi_def_cfa sp, 0 |
| ; RV32IV-NEXT: ret |
| entry: |
| %vec = alloca <vscale x 4 x float>, align 16 |
| ret void |
| } |
| |
| ; As above, but with 4 vectors of stack space. |
| define void @f4_vector(ptr %out) #0 { |
| ; RV64IV-LABEL: f4_vector: |
| ; RV64IV: # %bb.0: # %entry |
| ; RV64IV-NEXT: csrr t1, vlenb |
| ; RV64IV-NEXT: slli t1, t1, 3 |
| ; RV64IV-NEXT: .cfi_def_cfa t1, -64 |
| ; RV64IV-NEXT: lui t2, 1 |
| ; RV64IV-NEXT: .LBB1_1: # %entry |
| ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV64IV-NEXT: sub sp, sp, t2 |
| ; RV64IV-NEXT: sd zero, 0(sp) |
| ; RV64IV-NEXT: sub t1, t1, t2 |
| ; RV64IV-NEXT: bge t1, t2, .LBB1_1 |
| ; RV64IV-NEXT: # %bb.2: # %entry |
| ; RV64IV-NEXT: .cfi_def_cfa_register sp |
| ; RV64IV-NEXT: sub sp, sp, t1 |
| ; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb |
| ; RV64IV-NEXT: csrr a0, vlenb |
| ; RV64IV-NEXT: slli a0, a0, 3 |
| ; RV64IV-NEXT: add sp, sp, a0 |
| ; RV64IV-NEXT: .cfi_def_cfa sp, 0 |
| ; RV64IV-NEXT: ret |
| ; |
| ; RV32IV-LABEL: f4_vector: |
| ; RV32IV: # %bb.0: # %entry |
| ; RV32IV-NEXT: csrr t1, vlenb |
| ; RV32IV-NEXT: slli t1, t1, 3 |
| ; RV32IV-NEXT: .cfi_def_cfa t1, -64 |
| ; RV32IV-NEXT: lui t2, 1 |
| ; RV32IV-NEXT: .LBB1_1: # %entry |
| ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV32IV-NEXT: sub sp, sp, t2 |
| ; RV32IV-NEXT: sw zero, 0(sp) |
| ; RV32IV-NEXT: sub t1, t1, t2 |
| ; RV32IV-NEXT: bge t1, t2, .LBB1_1 |
| ; RV32IV-NEXT: # %bb.2: # %entry |
| ; RV32IV-NEXT: .cfi_def_cfa_register sp |
| ; RV32IV-NEXT: sub sp, sp, t1 |
| ; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 8 * vlenb |
| ; RV32IV-NEXT: csrr a0, vlenb |
| ; RV32IV-NEXT: slli a0, a0, 3 |
| ; RV32IV-NEXT: add sp, sp, a0 |
| ; RV32IV-NEXT: .cfi_def_cfa sp, 0 |
| ; RV32IV-NEXT: ret |
| entry: |
| %vec1 = alloca <vscale x 4 x float>, align 16 |
| %vec2 = alloca <vscale x 4 x float>, align 16 |
| %vec3 = alloca <vscale x 4 x float>, align 16 |
| %vec4 = alloca <vscale x 4 x float>, align 16 |
| ret void |
| } |
| |
| ; As above, but with 16 vectors of stack space. |
| ; The stack adjustment is less than or equal to 16 x 256 = 4096, so |
| ; we can allocate the locals at once. |
| define void @f16_vector(ptr %out) #0 { |
| ; RV64IV-LABEL: f16_vector: |
| ; RV64IV: # %bb.0: # %entry |
| ; RV64IV-NEXT: csrr t1, vlenb |
| ; RV64IV-NEXT: slli t1, t1, 5 |
| ; RV64IV-NEXT: .cfi_def_cfa t1, -256 |
| ; RV64IV-NEXT: lui t2, 1 |
| ; RV64IV-NEXT: .LBB2_1: # %entry |
| ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV64IV-NEXT: sub sp, sp, t2 |
| ; RV64IV-NEXT: sd zero, 0(sp) |
| ; RV64IV-NEXT: sub t1, t1, t2 |
| ; RV64IV-NEXT: bge t1, t2, .LBB2_1 |
| ; RV64IV-NEXT: # %bb.2: # %entry |
| ; RV64IV-NEXT: .cfi_def_cfa_register sp |
| ; RV64IV-NEXT: sub sp, sp, t1 |
| ; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb |
| ; RV64IV-NEXT: csrr a0, vlenb |
| ; RV64IV-NEXT: slli a0, a0, 5 |
| ; RV64IV-NEXT: add sp, sp, a0 |
| ; RV64IV-NEXT: .cfi_def_cfa sp, 0 |
| ; RV64IV-NEXT: ret |
| ; |
| ; RV32IV-LABEL: f16_vector: |
| ; RV32IV: # %bb.0: # %entry |
| ; RV32IV-NEXT: csrr t1, vlenb |
| ; RV32IV-NEXT: slli t1, t1, 5 |
| ; RV32IV-NEXT: .cfi_def_cfa t1, -256 |
| ; RV32IV-NEXT: lui t2, 1 |
| ; RV32IV-NEXT: .LBB2_1: # %entry |
| ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV32IV-NEXT: sub sp, sp, t2 |
| ; RV32IV-NEXT: sw zero, 0(sp) |
| ; RV32IV-NEXT: sub t1, t1, t2 |
| ; RV32IV-NEXT: bge t1, t2, .LBB2_1 |
| ; RV32IV-NEXT: # %bb.2: # %entry |
| ; RV32IV-NEXT: .cfi_def_cfa_register sp |
| ; RV32IV-NEXT: sub sp, sp, t1 |
| ; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 * vlenb |
| ; RV32IV-NEXT: csrr a0, vlenb |
| ; RV32IV-NEXT: slli a0, a0, 5 |
| ; RV32IV-NEXT: add sp, sp, a0 |
| ; RV32IV-NEXT: .cfi_def_cfa sp, 0 |
| ; RV32IV-NEXT: ret |
| entry: |
| %vec1 = alloca <vscale x 4 x float>, align 16 |
| %vec2 = alloca <vscale x 4 x float>, align 16 |
| %vec3 = alloca <vscale x 4 x float>, align 16 |
| %vec4 = alloca <vscale x 4 x float>, align 16 |
| %vec5 = alloca <vscale x 4 x float>, align 16 |
| %vec6 = alloca <vscale x 4 x float>, align 16 |
| %vec7 = alloca <vscale x 4 x float>, align 16 |
| %vec8 = alloca <vscale x 4 x float>, align 16 |
| %vec9 = alloca <vscale x 4 x float>, align 16 |
| %vec10 = alloca <vscale x 4 x float>, align 16 |
| %vec11 = alloca <vscale x 4 x float>, align 16 |
| %vec12 = alloca <vscale x 4 x float>, align 16 |
| %vec13 = alloca <vscale x 4 x float>, align 16 |
| %vec14 = alloca <vscale x 4 x float>, align 16 |
| %vec15 = alloca <vscale x 4 x float>, align 16 |
| %vec16 = alloca <vscale x 4 x float>, align 16 |
| ret void |
| } |
| |
| ; As above, but with 17 vectors of stack space. |
| define void @f17_vector(ptr %out) #0 { |
| ; RV64IV-LABEL: f17_vector: |
| ; RV64IV: # %bb.0: # %entry |
| ; RV64IV-NEXT: csrr t1, vlenb |
| ; RV64IV-NEXT: li a0, 34 |
| ; RV64IV-NEXT: mul t1, t1, a0 |
| ; RV64IV-NEXT: .cfi_def_cfa t1, -272 |
| ; RV64IV-NEXT: lui t2, 1 |
| ; RV64IV-NEXT: .LBB3_1: # %entry |
| ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV64IV-NEXT: sub sp, sp, t2 |
| ; RV64IV-NEXT: sd zero, 0(sp) |
| ; RV64IV-NEXT: sub t1, t1, t2 |
| ; RV64IV-NEXT: bge t1, t2, .LBB3_1 |
| ; RV64IV-NEXT: # %bb.2: # %entry |
| ; RV64IV-NEXT: .cfi_def_cfa_register sp |
| ; RV64IV-NEXT: sub sp, sp, t1 |
| ; RV64IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb |
| ; RV64IV-NEXT: csrr a0, vlenb |
| ; RV64IV-NEXT: li a1, 34 |
| ; RV64IV-NEXT: mul a0, a0, a1 |
| ; RV64IV-NEXT: add sp, sp, a0 |
| ; RV64IV-NEXT: .cfi_def_cfa sp, 0 |
| ; RV64IV-NEXT: ret |
| ; |
| ; RV32IV-LABEL: f17_vector: |
| ; RV32IV: # %bb.0: # %entry |
| ; RV32IV-NEXT: csrr t1, vlenb |
| ; RV32IV-NEXT: li a0, 34 |
| ; RV32IV-NEXT: mul t1, t1, a0 |
| ; RV32IV-NEXT: .cfi_def_cfa t1, -272 |
| ; RV32IV-NEXT: lui t2, 1 |
| ; RV32IV-NEXT: .LBB3_1: # %entry |
| ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV32IV-NEXT: sub sp, sp, t2 |
| ; RV32IV-NEXT: sw zero, 0(sp) |
| ; RV32IV-NEXT: sub t1, t1, t2 |
| ; RV32IV-NEXT: bge t1, t2, .LBB3_1 |
| ; RV32IV-NEXT: # %bb.2: # %entry |
| ; RV32IV-NEXT: .cfi_def_cfa_register sp |
| ; RV32IV-NEXT: sub sp, sp, t1 |
| ; RV32IV-NEXT: .cfi_escape 0x0f, 0x0a, 0x72, 0x00, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 34 * vlenb |
| ; RV32IV-NEXT: csrr a0, vlenb |
| ; RV32IV-NEXT: li a1, 34 |
| ; RV32IV-NEXT: mul a0, a0, a1 |
| ; RV32IV-NEXT: add sp, sp, a0 |
| ; RV32IV-NEXT: .cfi_def_cfa sp, 0 |
| ; RV32IV-NEXT: ret |
| entry: |
| %vec1 = alloca <vscale x 4 x float>, align 16 |
| %vec2 = alloca <vscale x 4 x float>, align 16 |
| %vec3 = alloca <vscale x 4 x float>, align 16 |
| %vec4 = alloca <vscale x 4 x float>, align 16 |
| %vec5 = alloca <vscale x 4 x float>, align 16 |
| %vec6 = alloca <vscale x 4 x float>, align 16 |
| %vec7 = alloca <vscale x 4 x float>, align 16 |
| %vec8 = alloca <vscale x 4 x float>, align 16 |
| %vec9 = alloca <vscale x 4 x float>, align 16 |
| %vec10 = alloca <vscale x 4 x float>, align 16 |
| %vec11 = alloca <vscale x 4 x float>, align 16 |
| %vec12 = alloca <vscale x 4 x float>, align 16 |
| %vec13 = alloca <vscale x 4 x float>, align 16 |
| %vec14 = alloca <vscale x 4 x float>, align 16 |
| %vec15 = alloca <vscale x 4 x float>, align 16 |
| %vec16 = alloca <vscale x 4 x float>, align 16 |
| %vec17 = alloca <vscale x 4 x float>, align 16 |
| ret void |
| } |
| |
| ; A vector and a 16-byte fixed size object. |
| define void @f1_vector_16_arr(ptr %out) #0 { |
| ; RV64IV-LABEL: f1_vector_16_arr: |
| ; RV64IV: # %bb.0: # %entry |
| ; RV64IV-NEXT: addi sp, sp, -16 |
| ; RV64IV-NEXT: .cfi_def_cfa_offset 16 |
| ; RV64IV-NEXT: csrr t1, vlenb |
| ; RV64IV-NEXT: slli t1, t1, 1 |
| ; RV64IV-NEXT: .cfi_def_cfa t1, -16 |
| ; RV64IV-NEXT: lui t2, 1 |
| ; RV64IV-NEXT: .LBB4_1: # %entry |
| ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV64IV-NEXT: sub sp, sp, t2 |
| ; RV64IV-NEXT: sd zero, 0(sp) |
| ; RV64IV-NEXT: sub t1, t1, t2 |
| ; RV64IV-NEXT: bge t1, t2, .LBB4_1 |
| ; RV64IV-NEXT: # %bb.2: # %entry |
| ; RV64IV-NEXT: .cfi_def_cfa_register sp |
| ; RV64IV-NEXT: sub sp, sp, t1 |
| ; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb |
| ; RV64IV-NEXT: csrr a0, vlenb |
| ; RV64IV-NEXT: slli a0, a0, 1 |
| ; RV64IV-NEXT: add sp, sp, a0 |
| ; RV64IV-NEXT: .cfi_def_cfa sp, 16 |
| ; RV64IV-NEXT: addi sp, sp, 16 |
| ; RV64IV-NEXT: .cfi_def_cfa_offset 0 |
| ; RV64IV-NEXT: ret |
| ; |
| ; RV32IV-LABEL: f1_vector_16_arr: |
| ; RV32IV: # %bb.0: # %entry |
| ; RV32IV-NEXT: addi sp, sp, -16 |
| ; RV32IV-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32IV-NEXT: csrr t1, vlenb |
| ; RV32IV-NEXT: slli t1, t1, 1 |
| ; RV32IV-NEXT: .cfi_def_cfa t1, -16 |
| ; RV32IV-NEXT: lui t2, 1 |
| ; RV32IV-NEXT: .LBB4_1: # %entry |
| ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV32IV-NEXT: sub sp, sp, t2 |
| ; RV32IV-NEXT: sw zero, 0(sp) |
| ; RV32IV-NEXT: sub t1, t1, t2 |
| ; RV32IV-NEXT: bge t1, t2, .LBB4_1 |
| ; RV32IV-NEXT: # %bb.2: # %entry |
| ; RV32IV-NEXT: .cfi_def_cfa_register sp |
| ; RV32IV-NEXT: sub sp, sp, t1 |
| ; RV32IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb |
| ; RV32IV-NEXT: csrr a0, vlenb |
| ; RV32IV-NEXT: slli a0, a0, 1 |
| ; RV32IV-NEXT: add sp, sp, a0 |
| ; RV32IV-NEXT: .cfi_def_cfa sp, 16 |
| ; RV32IV-NEXT: addi sp, sp, 16 |
| ; RV32IV-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32IV-NEXT: ret |
| entry: |
| %vec = alloca <vscale x 4 x float>, align 16 |
| %arr = alloca i8, i64 16, align 1 |
| ret void |
| } |
| |
| ; A large vector object and a large slot, both of which need probing. |
| define void @f1_vector_4096_arr(ptr %out) #0 { |
| ; RV64IV-LABEL: f1_vector_4096_arr: |
| ; RV64IV: # %bb.0: # %entry |
| ; RV64IV-NEXT: lui a0, 1 |
| ; RV64IV-NEXT: sub sp, sp, a0 |
| ; RV64IV-NEXT: sd zero, 0(sp) |
| ; RV64IV-NEXT: .cfi_def_cfa_offset 4096 |
| ; RV64IV-NEXT: lui a0, 1 |
| ; RV64IV-NEXT: sub sp, sp, a0 |
| ; RV64IV-NEXT: sd zero, 0(sp) |
| ; RV64IV-NEXT: .cfi_def_cfa_offset 8192 |
| ; RV64IV-NEXT: lui a0, 1 |
| ; RV64IV-NEXT: sub sp, sp, a0 |
| ; RV64IV-NEXT: sd zero, 0(sp) |
| ; RV64IV-NEXT: .cfi_def_cfa_offset 12288 |
| ; RV64IV-NEXT: addi sp, sp, -16 |
| ; RV64IV-NEXT: .cfi_def_cfa_offset 12304 |
| ; RV64IV-NEXT: csrr t1, vlenb |
| ; RV64IV-NEXT: slli t1, t1, 7 |
| ; RV64IV-NEXT: .cfi_def_cfa t1, -1024 |
| ; RV64IV-NEXT: lui t2, 1 |
| ; RV64IV-NEXT: .LBB5_1: # %entry |
| ; RV64IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV64IV-NEXT: sub sp, sp, t2 |
| ; RV64IV-NEXT: sd zero, 0(sp) |
| ; RV64IV-NEXT: sub t1, t1, t2 |
| ; RV64IV-NEXT: bge t1, t2, .LBB5_1 |
| ; RV64IV-NEXT: # %bb.2: # %entry |
| ; RV64IV-NEXT: .cfi_def_cfa_register sp |
| ; RV64IV-NEXT: sub sp, sp, t1 |
| ; RV64IV-NEXT: .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb |
| ; RV64IV-NEXT: csrr a0, vlenb |
| ; RV64IV-NEXT: slli a0, a0, 7 |
| ; RV64IV-NEXT: add sp, sp, a0 |
| ; RV64IV-NEXT: .cfi_def_cfa sp, 12304 |
| ; RV64IV-NEXT: lui a0, 3 |
| ; RV64IV-NEXT: addi a0, a0, 16 |
| ; RV64IV-NEXT: add sp, sp, a0 |
| ; RV64IV-NEXT: .cfi_def_cfa_offset 0 |
| ; RV64IV-NEXT: ret |
| ; |
| ; RV32IV-LABEL: f1_vector_4096_arr: |
| ; RV32IV: # %bb.0: # %entry |
| ; RV32IV-NEXT: lui a0, 1 |
| ; RV32IV-NEXT: sub sp, sp, a0 |
| ; RV32IV-NEXT: sw zero, 0(sp) |
| ; RV32IV-NEXT: .cfi_def_cfa_offset 4096 |
| ; RV32IV-NEXT: lui a0, 1 |
| ; RV32IV-NEXT: sub sp, sp, a0 |
| ; RV32IV-NEXT: sw zero, 0(sp) |
| ; RV32IV-NEXT: .cfi_def_cfa_offset 8192 |
| ; RV32IV-NEXT: lui a0, 1 |
| ; RV32IV-NEXT: sub sp, sp, a0 |
| ; RV32IV-NEXT: sw zero, 0(sp) |
| ; RV32IV-NEXT: .cfi_def_cfa_offset 12288 |
| ; RV32IV-NEXT: addi sp, sp, -16 |
| ; RV32IV-NEXT: .cfi_def_cfa_offset 12304 |
| ; RV32IV-NEXT: csrr t1, vlenb |
| ; RV32IV-NEXT: slli t1, t1, 7 |
| ; RV32IV-NEXT: .cfi_def_cfa t1, -1024 |
| ; RV32IV-NEXT: lui t2, 1 |
| ; RV32IV-NEXT: .LBB5_1: # %entry |
| ; RV32IV-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV32IV-NEXT: sub sp, sp, t2 |
| ; RV32IV-NEXT: sw zero, 0(sp) |
| ; RV32IV-NEXT: sub t1, t1, t2 |
| ; RV32IV-NEXT: bge t1, t2, .LBB5_1 |
| ; RV32IV-NEXT: # %bb.2: # %entry |
| ; RV32IV-NEXT: .cfi_def_cfa_register sp |
| ; RV32IV-NEXT: sub sp, sp, t1 |
| ; RV32IV-NEXT: .cfi_escape 0x0f, 0x10, 0x72, 0x00, 0x11, 0x90, 0xe0, 0x00, 0x22, 0x11, 0x80, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 12304 + 128 * vlenb |
| ; RV32IV-NEXT: csrr a0, vlenb |
| ; RV32IV-NEXT: slli a0, a0, 7 |
| ; RV32IV-NEXT: add sp, sp, a0 |
| ; RV32IV-NEXT: .cfi_def_cfa sp, 12304 |
| ; RV32IV-NEXT: lui a0, 3 |
| ; RV32IV-NEXT: addi a0, a0, 16 |
| ; RV32IV-NEXT: add sp, sp, a0 |
| ; RV32IV-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32IV-NEXT: ret |
| entry: |
| %vec = alloca <vscale x 256 x float>, align 16 |
| %arr = alloca i8, i64 12288, align 1 |
| ret void |
| } |
| |
| attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" } |