blob: 97d3d4f11e678a6e8797405a4c16310cd3c6c44f [file] [log] [blame]
; RUN: opt -mtriple=amdgcn-- -passes='loop(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -passes='loop-mssa(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -passes='simple-loop-unswitch<nontrivial>' -verify-memoryssa -S < %s | FileCheck %s
declare i32 @a()
declare i32 @b()
declare i32 @c()
; Non-trivial loop unswitching where there are two distinct trivial
; conditions to unswitch within the loop. The conditions are divergent
; and should not unswitch.
define void @test1(ptr %ptr, i1 %cond1, i1 %cond2) {
; CHECK-LABEL: @test1(
entry:
br label %loop_begin
; CHECK-NEXT: entry:
; CHECK-NEXT: br label %loop_begin
loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
; CHECK: loop_begin:
; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
loop_a:
%unused.a = call i32 @a()
br label %latch
; CHECK: loop_a:
; CHECK-NEXT: %unused.a = call i32 @a()
; CHECK-NEXT: br label %latch
loop_b:
br i1 %cond2, label %loop_b_a, label %loop_b_b
; CHECK: loop_b:
; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
loop_b_a:
%unused.b = call i32 @b()
br label %latch
; CHECK: loop_b_a:
; CHECK-NEXT: %unused.b = call i32 @b()
; CHECK-NEXT: br label %latch
loop_b_b:
%unused.c = call i32 @c()
br label %latch
; CHECK: loop_b_b:
; CHECK-NEXT: %unused.c = call i32 @c()
; CHECK-NEXT: br label %latch
latch:
%v = load i1, ptr %ptr
br i1 %v, label %loop_begin, label %loop_exit
; CHECK: latch:
; CHECK-NEXT: %v = load i1, ptr %ptr
; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
loop_exit:
ret void
; CHECK: loop_exit:
; CHECK-NEXT: ret void
}
; Non-trivial loop unswitching where there are two distinct trivial
; conditions to unswitch within the loop. The conditions are known to
; be uniform, so it should be unswitchable. However, unswitch
; currently does not make use of UniformityAnalysis.
define amdgpu_kernel void @test1_uniform(ptr %ptr, i1 %cond1, i1 %cond2) {
; CHECK-LABEL: @test1_uniform(
entry:
br label %loop_begin
; CHECK-NEXT: entry:
; CHECK-NEXT: br label %loop_begin
loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
; CHECK: loop_begin:
; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
loop_a:
%unused.a = call i32 @a()
br label %latch
; CHECK: loop_a:
; CHECK-NEXT: %unused.a = call i32 @a()
; CHECK-NEXT: br label %latch
loop_b:
br i1 %cond2, label %loop_b_a, label %loop_b_b
; CHECK: loop_b:
; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
loop_b_a:
%unused.b = call i32 @b()
br label %latch
; CHECK: loop_b_a:
; CHECK-NEXT: %unused.b = call i32 @b()
; CHECK-NEXT: br label %latch
loop_b_b:
%unused.c = call i32 @c()
br label %latch
; CHECK: loop_b_b:
; CHECK-NEXT: %unused.c = call i32 @c()
; CHECK-NEXT: br label %latch
latch:
%v = load i1, ptr %ptr
br i1 %v, label %loop_begin, label %loop_exit
; CHECK: latch:
; CHECK-NEXT: %v = load i1, ptr %ptr
; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
loop_exit:
ret void
; CHECK: loop_exit:
; CHECK-NEXT: ret void
}
; Non-trivial loop unswitching where there are two distinct trivial
; conditions to unswitch within the loop. There is no divergence
; because it's assumed it can only execute with a workgroup of size 1.
define void @test1_single_lane_execution(ptr %ptr, i1 %cond1, i1 %cond2) #0 {
; CHECK-LABEL: @test1_single_lane_execution(
entry:
br label %loop_begin
; CHECK-NEXT: entry:
; CHECK-NEXT: br label %loop_begin
loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
; CHECK: loop_begin:
; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
loop_a:
%unused.a = call i32 @a()
br label %latch
; CHECK: loop_a:
; CHECK-NEXT: %unused.a = call i32 @a()
; CHECK-NEXT: br label %latch
loop_b:
br i1 %cond2, label %loop_b_a, label %loop_b_b
; CHECK: loop_b:
; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
loop_b_a:
%unused.b = call i32 @b()
br label %latch
; CHECK: loop_b_a:
; CHECK-NEXT: %unused.b = call i32 @b()
; CHECK-NEXT: br label %latch
loop_b_b:
%unused.c = call i32 @c()
br label %latch
; CHECK: loop_b_b:
; CHECK-NEXT: %unused.c = call i32 @c()
; CHECK-NEXT: br label %latch
latch:
%v = load i1, ptr %ptr
br i1 %v, label %loop_begin, label %loop_exit
; CHECK: latch:
; CHECK-NEXT: %v = load i1, ptr %ptr
; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
loop_exit:
ret void
; CHECK: loop_exit:
; CHECK-NEXT: ret void
}
attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }