|  | ; RUN: opt -mtriple=amdgcn-- -passes='loop(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s | 
|  | ; RUN: opt -mtriple=amdgcn-- -passes='loop-mssa(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s | 
|  | ; RUN: opt -mtriple=amdgcn-- -passes='simple-loop-unswitch<nontrivial>' -verify-memoryssa -S < %s | FileCheck %s | 
|  |  | 
|  | declare i32 @a() | 
|  | declare i32 @b() | 
|  | declare i32 @c() | 
|  |  | 
|  | ; Non-trivial loop unswitching where there are two distinct trivial | 
|  | ; conditions to unswitch within the loop. The conditions are divergent | 
|  | ; and should not unswitch. | 
|  | define void @test1(ptr %ptr, i1 %cond1, i1 %cond2) { | 
|  | ; CHECK-LABEL: @test1( | 
|  | entry: | 
|  | br label %loop_begin | 
|  | ; CHECK-NEXT:  entry: | 
|  | ; CHECK-NEXT:    br label %loop_begin | 
|  |  | 
|  | loop_begin: | 
|  | br i1 %cond1, label %loop_a, label %loop_b | 
|  | ; CHECK: loop_begin: | 
|  | ; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b | 
|  |  | 
|  | loop_a: | 
|  | %unused.a = call i32 @a() | 
|  | br label %latch | 
|  | ; CHECK: loop_a: | 
|  | ; CHECK-NEXT: %unused.a = call i32 @a() | 
|  | ; CHECK-NEXT: br label %latch | 
|  |  | 
|  | loop_b: | 
|  | br i1 %cond2, label %loop_b_a, label %loop_b_b | 
|  | ; CHECK: loop_b: | 
|  | ; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b | 
|  |  | 
|  | loop_b_a: | 
|  | %unused.b = call i32 @b() | 
|  | br label %latch | 
|  | ; CHECK: loop_b_a: | 
|  | ; CHECK-NEXT: %unused.b = call i32 @b() | 
|  | ; CHECK-NEXT: br label %latch | 
|  |  | 
|  | loop_b_b: | 
|  | %unused.c = call i32 @c() | 
|  | br label %latch | 
|  | ; CHECK: loop_b_b: | 
|  | ; CHECK-NEXT: %unused.c = call i32 @c() | 
|  | ; CHECK-NEXT: br label %latch | 
|  |  | 
|  | latch: | 
|  | %v = load i1, ptr %ptr | 
|  | br i1 %v, label %loop_begin, label %loop_exit | 
|  | ; CHECK: latch: | 
|  | ; CHECK-NEXT: %v = load i1, ptr %ptr | 
|  | ; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit | 
|  |  | 
|  | loop_exit: | 
|  | ret void | 
|  | ; CHECK: loop_exit: | 
|  | ; CHECK-NEXT: ret void | 
|  | } | 
|  |  | 
|  | ; Non-trivial loop unswitching where there are two distinct trivial | 
|  | ; conditions to unswitch within the loop. The conditions are known to | 
|  | ; be uniform, so it should be unswitchable. However, unswitch | 
|  | ; currently does not make use of UniformityAnalysis. | 
|  | define amdgpu_kernel void @test1_uniform(ptr %ptr, i1 %cond1, i1 %cond2) { | 
|  | ; CHECK-LABEL: @test1_uniform( | 
|  | entry: | 
|  | br label %loop_begin | 
|  | ; CHECK-NEXT:  entry: | 
|  | ; CHECK-NEXT:    br label %loop_begin | 
|  |  | 
|  | loop_begin: | 
|  | br i1 %cond1, label %loop_a, label %loop_b | 
|  | ; CHECK: loop_begin: | 
|  | ; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b | 
|  |  | 
|  | loop_a: | 
|  | %unused.a = call i32 @a() | 
|  | br label %latch | 
|  | ; CHECK: loop_a: | 
|  | ; CHECK-NEXT: %unused.a = call i32 @a() | 
|  | ; CHECK-NEXT: br label %latch | 
|  |  | 
|  | loop_b: | 
|  | br i1 %cond2, label %loop_b_a, label %loop_b_b | 
|  | ; CHECK: loop_b: | 
|  | ; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b | 
|  |  | 
|  | loop_b_a: | 
|  | %unused.b = call i32 @b() | 
|  | br label %latch | 
|  | ; CHECK: loop_b_a: | 
|  | ; CHECK-NEXT: %unused.b = call i32 @b() | 
|  | ; CHECK-NEXT: br label %latch | 
|  |  | 
|  | loop_b_b: | 
|  | %unused.c = call i32 @c() | 
|  | br label %latch | 
|  | ; CHECK: loop_b_b: | 
|  | ; CHECK-NEXT: %unused.c = call i32 @c() | 
|  | ; CHECK-NEXT: br label %latch | 
|  |  | 
|  | latch: | 
|  | %v = load i1, ptr %ptr | 
|  | br i1 %v, label %loop_begin, label %loop_exit | 
|  | ; CHECK: latch: | 
|  | ; CHECK-NEXT: %v = load i1, ptr %ptr | 
|  | ; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit | 
|  |  | 
|  | loop_exit: | 
|  | ret void | 
|  | ; CHECK: loop_exit: | 
|  | ; CHECK-NEXT: ret void | 
|  | } | 
|  |  | 
|  | ; Non-trivial loop unswitching where there are two distinct trivial | 
|  | ; conditions to unswitch within the loop. There is no divergence | 
|  | ; because it's assumed it can only execute with a workgroup of size 1. | 
|  | define void @test1_single_lane_execution(ptr %ptr, i1 %cond1, i1 %cond2) #0 { | 
|  | ; CHECK-LABEL: @test1_single_lane_execution( | 
|  | entry: | 
|  | br label %loop_begin | 
|  | ; CHECK-NEXT:  entry: | 
|  | ; CHECK-NEXT:    br i1 %cond1, label %entry.split.us, label %entry.split | 
|  |  | 
|  | loop_begin: | 
|  | br i1 %cond1, label %loop_a, label %loop_b | 
|  |  | 
|  | loop_a: | 
|  | call i32 @a() | 
|  | br label %latch | 
|  | ; The 'loop_a' unswitched loop. | 
|  | ; | 
|  | ; CHECK:       entry.split.us: | 
|  | ; CHECK-NEXT:    br label %loop_begin.us | 
|  | ; | 
|  | ; CHECK:       loop_begin.us: | 
|  | ; CHECK-NEXT:    br label %loop_a.us | 
|  | ; | 
|  | ; CHECK:       loop_a.us: | 
|  | ; CHECK-NEXT:    call i32 @a() | 
|  | ; CHECK-NEXT:    br label %latch.us | 
|  | ; | 
|  | ; CHECK:       latch.us: | 
|  | ; CHECK-NEXT:    %[[V:.*]] = load i1, ptr %ptr | 
|  | ; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us | 
|  | ; | 
|  | ; CHECK:       loop_exit.split.us: | 
|  | ; CHECK-NEXT:    br label %loop_exit | 
|  |  | 
|  | loop_b: | 
|  | br i1 %cond2, label %loop_b_a, label %loop_b_b | 
|  | ; The second unswitched condition. | 
|  | ; | 
|  | ; CHECK:       entry.split: | 
|  | ; CHECK-NEXT:    br i1 %cond2, label %entry.split.split.us, label %entry.split.split | 
|  |  | 
|  | loop_b_a: | 
|  | call i32 @b() | 
|  | br label %latch | 
|  | ; The 'loop_b_a' unswitched loop. | 
|  | ; | 
|  | ; CHECK:       entry.split.split.us: | 
|  | ; CHECK-NEXT:    br label %loop_begin.us1 | 
|  | ; | 
|  | ; CHECK:       loop_begin.us1: | 
|  | ; CHECK-NEXT:    br label %loop_b.us | 
|  | ; | 
|  | ; CHECK:       loop_b.us: | 
|  | ; CHECK-NEXT:    br label %loop_b_a.us | 
|  | ; | 
|  | ; CHECK:       loop_b_a.us: | 
|  | ; CHECK-NEXT:    call i32 @b() | 
|  | ; CHECK-NEXT:    br label %latch.us2 | 
|  | ; | 
|  | ; CHECK:       latch.us2: | 
|  | ; CHECK-NEXT:    %[[V:.*]] = load i1, ptr %ptr | 
|  | ; CHECK-NEXT:    br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us | 
|  | ; | 
|  | ; CHECK:       loop_exit.split.split.us: | 
|  | ; CHECK-NEXT:    br label %loop_exit.split | 
|  |  | 
|  | loop_b_b: | 
|  | call i32 @c() | 
|  | br label %latch | 
|  | ; The 'loop_b_b' unswitched loop. | 
|  | ; | 
|  | ; CHECK:       entry.split.split: | 
|  | ; CHECK-NEXT:    br label %loop_begin | 
|  | ; | 
|  | ; CHECK:       loop_begin: | 
|  | ; CHECK-NEXT:    br label %loop_b | 
|  | ; | 
|  | ; CHECK:       loop_b: | 
|  | ; CHECK-NEXT:    br label %loop_b_b | 
|  | ; | 
|  | ; CHECK:       loop_b_b: | 
|  | ; CHECK-NEXT:    call i32 @c() | 
|  | ; CHECK-NEXT:    br label %latch | 
|  | ; | 
|  | ; CHECK:       latch: | 
|  | ; CHECK-NEXT:    %[[V:.*]] = load i1, ptr %ptr | 
|  | ; CHECK-NEXT:    br i1 %[[V]], label %loop_begin, label %loop_exit.split.split | 
|  | ; | 
|  | ; CHECK:       loop_exit.split.split: | 
|  | ; CHECK-NEXT:    br label %loop_exit.split | 
|  |  | 
|  | latch: | 
|  | %v = load i1, ptr %ptr | 
|  | br i1 %v, label %loop_begin, label %loop_exit | 
|  |  | 
|  | loop_exit: | 
|  | ret void | 
|  | ; CHECK:       loop_exit.split: | 
|  | ; CHECK-NEXT:    br label %loop_exit | 
|  | ; | 
|  | ; CHECK:       loop_exit: | 
|  | ; CHECK-NEXT:    ret | 
|  | } | 
|  |  | 
|  | attributes #0 = { "amdgpu-flat-work-group-size"="1,1" } |