blob: 8a252751165d09a3b47fccee63902100d3033cfd [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O1 -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin -verify-machineinstrs < %s | FileCheck %s
@a = common global i32 0, align 4
@l = common global i32 0, align 4
@b = common global i32 0, align 4
@c = common global i32 0, align 4
@d = common global i32 0, align 4
@e = common global i32 0, align 4
@k = common global i32 0, align 4
@f = common global i32 0, align 4
@j = common global i32 0, align 4
@g = common global i32 0, align 4
@i = common global i32 0, align 4
@h = common global i32 0, align 4
; This test case benefits from codegen recognising that some values are
; trivially rematerialisable, meaning they are recreated rather than saved to
; the stack and restored. It creates high register pressure to force this
; situation.
define i32 @test() nounwind {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -112
; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: lui s0, %hi(a)
; CHECK-NEXT: lw a0, %lo(a)(s0)
; CHECK-NEXT: beqz a0, .LBB0_11
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: lui s1, %hi(l)
; CHECK-NEXT: lui s2, %hi(k)
; CHECK-NEXT: lui s3, %hi(j)
; CHECK-NEXT: lui s4, %hi(i)
; CHECK-NEXT: lui s5, %hi(d)
; CHECK-NEXT: lui s6, %hi(e)
; CHECK-NEXT: lui s7, %hi(f)
; CHECK-NEXT: lui s8, %hi(g)
; CHECK-NEXT: lui s9, %hi(h)
; CHECK-NEXT: lui s10, %hi(c)
; CHECK-NEXT: lui s11, %hi(b)
; CHECK-NEXT: j .LBB0_3
; CHECK-NEXT: .LBB0_2: # %for.inc
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: lw a0, %lo(a)(s0)
; CHECK-NEXT: addiw a0, a0, -1
; CHECK-NEXT: sw a0, %lo(a)(s0)
; CHECK-NEXT: beqz a0, .LBB0_11
; CHECK-NEXT: .LBB0_3: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lw a1, %lo(l)(s1)
; CHECK-NEXT: beqz a1, .LBB0_5
; CHECK-NEXT: # %bb.4: # %if.then
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: lw a4, %lo(e)(s6)
; CHECK-NEXT: lw a3, %lo(d)(s5)
; CHECK-NEXT: lw a2, %lo(c)(s10)
; CHECK-NEXT: lw a1, %lo(b)(s11)
; CHECK-NEXT: li a5, 32
; CHECK-NEXT: call foo
; CHECK-NEXT: .LBB0_5: # %if.end
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: lw a0, %lo(k)(s2)
; CHECK-NEXT: beqz a0, .LBB0_7
; CHECK-NEXT: # %bb.6: # %if.then3
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: lw a4, %lo(f)(s7)
; CHECK-NEXT: lw a3, %lo(e)(s6)
; CHECK-NEXT: lw a2, %lo(d)(s5)
; CHECK-NEXT: lw a1, %lo(c)(s10)
; CHECK-NEXT: lw a0, %lo(b)(s11)
; CHECK-NEXT: li a5, 64
; CHECK-NEXT: call foo
; CHECK-NEXT: .LBB0_7: # %if.end5
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: lw a0, %lo(j)(s3)
; CHECK-NEXT: beqz a0, .LBB0_9
; CHECK-NEXT: # %bb.8: # %if.then7
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: lw a4, %lo(g)(s8)
; CHECK-NEXT: lw a3, %lo(f)(s7)
; CHECK-NEXT: lw a2, %lo(e)(s6)
; CHECK-NEXT: lw a1, %lo(d)(s5)
; CHECK-NEXT: lw a0, %lo(c)(s10)
; CHECK-NEXT: li a5, 32
; CHECK-NEXT: call foo
; CHECK-NEXT: .LBB0_9: # %if.end9
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: lw a0, %lo(i)(s4)
; CHECK-NEXT: beqz a0, .LBB0_2
; CHECK-NEXT: # %bb.10: # %if.then11
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: lw a4, %lo(h)(s9)
; CHECK-NEXT: lw a3, %lo(g)(s8)
; CHECK-NEXT: lw a2, %lo(f)(s7)
; CHECK-NEXT: lw a1, %lo(e)(s6)
; CHECK-NEXT: lw a0, %lo(d)(s5)
; CHECK-NEXT: li a5, 32
; CHECK-NEXT: call foo
; CHECK-NEXT: j .LBB0_2
; CHECK-NEXT: .LBB0_11: # %for.end
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 112
; CHECK-NEXT: ret
entry:
%.pr = load i32, ptr @a, align 4
%tobool14 = icmp eq i32 %.pr, 0
br i1 %tobool14, label %for.end, label %for.body
for.body: ; preds = %entry, %for.inc
%0 = phi i32 [ %dec, %for.inc ], [ %.pr, %entry ]
%1 = load i32, ptr @l, align 4
%tobool1 = icmp eq i32 %1, 0
br i1 %tobool1, label %if.end, label %if.then
if.then: ; preds = %for.body
%2 = load i32, ptr @b, align 4
%3 = load i32, ptr @c, align 4
%4 = load i32, ptr @d, align 4
%5 = load i32, ptr @e, align 4
%call = tail call i32 @foo(i32 %0, i32 %2, i32 %3, i32 %4, i32 %5, i32 32)
br label %if.end
if.end: ; preds = %for.body, %if.then
%6 = load i32, ptr @k, align 4
%tobool2 = icmp eq i32 %6, 0
br i1 %tobool2, label %if.end5, label %if.then3
if.then3: ; preds = %if.end
%7 = load i32, ptr @b, align 4
%8 = load i32, ptr @c, align 4
%9 = load i32, ptr @d, align 4
%10 = load i32, ptr @e, align 4
%11 = load i32, ptr @f, align 4
%call4 = tail call i32 @foo(i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 64)
br label %if.end5
if.end5: ; preds = %if.end, %if.then3
%12 = load i32, ptr @j, align 4
%tobool6 = icmp eq i32 %12, 0
br i1 %tobool6, label %if.end9, label %if.then7
if.then7: ; preds = %if.end5
%13 = load i32, ptr @c, align 4
%14 = load i32, ptr @d, align 4
%15 = load i32, ptr @e, align 4
%16 = load i32, ptr @f, align 4
%17 = load i32, ptr @g, align 4
%call8 = tail call i32 @foo(i32 %13, i32 %14, i32 %15, i32 %16, i32 %17, i32 32)
br label %if.end9
if.end9: ; preds = %if.end5, %if.then7
%18 = load i32, ptr @i, align 4
%tobool10 = icmp eq i32 %18, 0
br i1 %tobool10, label %for.inc, label %if.then11
if.then11: ; preds = %if.end9
%19 = load i32, ptr @d, align 4
%20 = load i32, ptr @e, align 4
%21 = load i32, ptr @f, align 4
%22 = load i32, ptr @g, align 4
%23 = load i32, ptr @h, align 4
%call12 = tail call i32 @foo(i32 %19, i32 %20, i32 %21, i32 %22, i32 %23, i32 32)
br label %for.inc
for.inc: ; preds = %if.end9, %if.then11
%24 = load i32, ptr @a, align 4
%dec = add nsw i32 %24, -1
store i32 %dec, ptr @a, align 4
%tobool = icmp eq i32 %dec, 0
br i1 %tobool, label %for.end, label %for.body
for.end: ; preds = %for.inc, %entry
ret i32 1
}
declare i32 @foo(i32, i32, i32, i32, i32, i32)
define void @remat_load(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, double %8, double %9, double %10, double %11, double %12, double %13, double %14, double %15, i8 %stackarg0, i16 %stackarg1, i32 %stackarg2, i64 %stackarg3, half %stackarg4, bfloat %stackarg5, float %stackarg6, double %stackarg7, ptr %p) nounwind {
; CHECK-LABEL: remat_load:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -208
; CHECK-NEXT: sd ra, 200(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 192(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s1, 184(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s2, 176(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s3, 168(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s4, 160(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s5, 152(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s6, 144(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s7, 136(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s8, 128(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs0, 96(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs1, 88(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs2, 80(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs3, 72(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs4, 64(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs5, 56(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs6, 48(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs7, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs8, 32(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs9, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs10, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: fsd fs11, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: fld fa5, 264(sp)
; CHECK-NEXT: flw fa4, 256(sp)
; CHECK-NEXT: flh fa3, 248(sp)
; CHECK-NEXT: flh fa2, 240(sp)
; CHECK-NEXT: ld a0, 272(sp)
; CHECK-NEXT: lbu a4, 208(sp)
; CHECK-NEXT: lh a3, 216(sp)
; CHECK-NEXT: lw a2, 224(sp)
; CHECK-NEXT: ld a1, 232(sp)
; CHECK-NEXT: sb a4, 0(a0)
; CHECK-NEXT: sh a3, 0(a0)
; CHECK-NEXT: sw a2, 0(a0)
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: fsh fa2, 0(a0)
; CHECK-NEXT: fsh fa3, 0(a0)
; CHECK-NEXT: fsw fa4, 0(a0)
; CHECK-NEXT: fsd fa5, 0(a0)
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ld a0, 272(sp)
; CHECK-NEXT: lbu a1, 208(sp)
; CHECK-NEXT: sb a1, 0(a0)
; CHECK-NEXT: lh a1, 216(sp)
; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: lw a1, 224(sp)
; CHECK-NEXT: sw a1, 0(a0)
; CHECK-NEXT: ld a1, 232(sp)
; CHECK-NEXT: sd a1, 0(a0)
; CHECK-NEXT: flh fa5, 240(sp)
; CHECK-NEXT: fsh fa5, 0(a0)
; CHECK-NEXT: flh fa5, 248(sp)
; CHECK-NEXT: fsh fa5, 0(a0)
; CHECK-NEXT: flw fa5, 256(sp)
; CHECK-NEXT: fsw fa5, 0(a0)
; CHECK-NEXT: fld fa5, 264(sp)
; CHECK-NEXT: fsd fa5, 0(a0)
; CHECK-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s1, 184(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s2, 176(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s3, 168(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s4, 160(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s5, 152(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s6, 144(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s7, 136(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s8, 128(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s9, 120(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s10, 112(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s11, 104(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs0, 96(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs1, 88(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs2, 80(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs3, 72(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs4, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs5, 56(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs6, 48(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs7, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs8, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs9, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs10, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: fld fs11, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 208
; CHECK-NEXT: ret
entry:
; Add a use of the stack arguments here so that we will have to load them from
; the stack before the inline asm. Otherwise we would be exercising the
; machine scheduler, not rematerialization.
store volatile i8 %stackarg0, ptr %p
store volatile i16 %stackarg1, ptr %p
store volatile i32 %stackarg2, ptr %p
store volatile i64 %stackarg3, ptr %p
store volatile half %stackarg4, ptr %p
store volatile bfloat %stackarg5, ptr %p
store volatile float %stackarg6, ptr %p
store volatile double %stackarg7, ptr %p
tail call void asm sideeffect "", "~{x1},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31},~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
; Now use them after spilling everything to force rematerialization
store volatile i8 %stackarg0, ptr %p
store volatile i16 %stackarg1, ptr %p
store volatile i32 %stackarg2, ptr %p
store volatile i64 %stackarg3, ptr %p
store volatile half %stackarg4, ptr %p
store volatile bfloat %stackarg5, ptr %p
store volatile float %stackarg6, ptr %p
store volatile double %stackarg7, ptr %p
ret void
}
; We could remat the load of the constant global if we extended the live
; interval of the high bits of the address.
@const = external constant i32
define i32 @constglobal_load() nounwind {
; CHECK-LABEL: constglobal_load:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -112
; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: lui a0, %hi(const)
; CHECK-NEXT: lw a0, %lo(const)(a0)
; CHECK-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
; CHECK-NEXT: addiw a0, a0, 1
; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 112
; CHECK-NEXT: ret
entry:
%global = load i32, ptr @const
tail call void asm sideeffect "", "~{x1},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"()
%a = add i32 %global, 1
ret i32 %a
}