| ; RUN: opt -mtriple=amdgcn-- -passes='loop(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s |
| ; RUN: opt -mtriple=amdgcn-- -passes='loop-mssa(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s |
| ; RUN: opt -mtriple=amdgcn-- -passes='simple-loop-unswitch<nontrivial>' -verify-memoryssa -S < %s | FileCheck %s |
| |
| declare i32 @a() |
| declare i32 @b() |
| declare i32 @c() |
| |
| ; Non-trivial loop unswitching where there are two distinct trivial |
| ; conditions to unswitch within the loop. The conditions are divergent |
| ; and should not unswitch. |
| define void @test1(ptr %ptr, i1 %cond1, i1 %cond2) { |
| ; CHECK-LABEL: @test1( |
| entry: |
| br label %loop_begin |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label %loop_begin |
| |
| loop_begin: |
| br i1 %cond1, label %loop_a, label %loop_b |
| ; CHECK: loop_begin: |
| ; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b |
| |
| loop_a: |
| %unused.a = call i32 @a() |
| br label %latch |
| ; CHECK: loop_a: |
| ; CHECK-NEXT: %unused.a = call i32 @a() |
| ; CHECK-NEXT: br label %latch |
| |
| loop_b: |
| br i1 %cond2, label %loop_b_a, label %loop_b_b |
| ; CHECK: loop_b: |
| ; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b |
| |
| loop_b_a: |
| %unused.b = call i32 @b() |
| br label %latch |
| ; CHECK: loop_b_a: |
| ; CHECK-NEXT: %unused.b = call i32 @b() |
| ; CHECK-NEXT: br label %latch |
| |
| loop_b_b: |
| %unused.c = call i32 @c() |
| br label %latch |
| ; CHECK: loop_b_b: |
| ; CHECK-NEXT: %unused.c = call i32 @c() |
| ; CHECK-NEXT: br label %latch |
| |
| latch: |
| %v = load i1, ptr %ptr |
| br i1 %v, label %loop_begin, label %loop_exit |
| ; CHECK: latch: |
| ; CHECK-NEXT: %v = load i1, ptr %ptr |
| ; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit |
| |
| loop_exit: |
| ret void |
| ; CHECK: loop_exit: |
| ; CHECK-NEXT: ret void |
| } |
| |
| ; Non-trivial loop unswitching where there are two distinct trivial |
| ; conditions to unswitch within the loop. The conditions are known to |
| ; be uniform, so it should be unswitchable. However, unswitch |
| ; currently does not make use of UniformityAnalysis. |
| define amdgpu_kernel void @test1_uniform(ptr %ptr, i1 %cond1, i1 %cond2) { |
| ; CHECK-LABEL: @test1_uniform( |
| entry: |
| br label %loop_begin |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label %loop_begin |
| |
| loop_begin: |
| br i1 %cond1, label %loop_a, label %loop_b |
| ; CHECK: loop_begin: |
| ; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b |
| |
| loop_a: |
| %unused.a = call i32 @a() |
| br label %latch |
| ; CHECK: loop_a: |
| ; CHECK-NEXT: %unused.a = call i32 @a() |
| ; CHECK-NEXT: br label %latch |
| |
| loop_b: |
| br i1 %cond2, label %loop_b_a, label %loop_b_b |
| ; CHECK: loop_b: |
| ; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b |
| |
| loop_b_a: |
| %unused.b = call i32 @b() |
| br label %latch |
| ; CHECK: loop_b_a: |
| ; CHECK-NEXT: %unused.b = call i32 @b() |
| ; CHECK-NEXT: br label %latch |
| |
| loop_b_b: |
| %unused.c = call i32 @c() |
| br label %latch |
| ; CHECK: loop_b_b: |
| ; CHECK-NEXT: %unused.c = call i32 @c() |
| ; CHECK-NEXT: br label %latch |
| |
| latch: |
| %v = load i1, ptr %ptr |
| br i1 %v, label %loop_begin, label %loop_exit |
| ; CHECK: latch: |
| ; CHECK-NEXT: %v = load i1, ptr %ptr |
| ; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit |
| |
| loop_exit: |
| ret void |
| ; CHECK: loop_exit: |
| ; CHECK-NEXT: ret void |
| } |
| |
| ; Non-trivial loop unswitching where there are two distinct trivial |
| ; conditions to unswitch within the loop. There is no divergence |
| ; because it's assumed it can only execute with a workgroup of size 1. |
| define void @test1_single_lane_execution(ptr %ptr, i1 %cond1, i1 %cond2) #0 { |
| ; CHECK-LABEL: @test1_single_lane_execution( |
| entry: |
| br label %loop_begin |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label %loop_begin |
| |
| loop_begin: |
| br i1 %cond1, label %loop_a, label %loop_b |
| ; CHECK: loop_begin: |
| ; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b |
| |
| loop_a: |
| %unused.a = call i32 @a() |
| br label %latch |
| ; CHECK: loop_a: |
| ; CHECK-NEXT: %unused.a = call i32 @a() |
| ; CHECK-NEXT: br label %latch |
| |
| loop_b: |
| br i1 %cond2, label %loop_b_a, label %loop_b_b |
| ; CHECK: loop_b: |
| ; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b |
| |
| loop_b_a: |
| %unused.b = call i32 @b() |
| br label %latch |
| ; CHECK: loop_b_a: |
| ; CHECK-NEXT: %unused.b = call i32 @b() |
| ; CHECK-NEXT: br label %latch |
| |
| loop_b_b: |
| %unused.c = call i32 @c() |
| br label %latch |
| ; CHECK: loop_b_b: |
| ; CHECK-NEXT: %unused.c = call i32 @c() |
| ; CHECK-NEXT: br label %latch |
| |
| latch: |
| %v = load i1, ptr %ptr |
| br i1 %v, label %loop_begin, label %loop_exit |
| ; CHECK: latch: |
| ; CHECK-NEXT: %v = load i1, ptr %ptr |
| ; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit |
| |
| loop_exit: |
| ret void |
| ; CHECK: loop_exit: |
| ; CHECK-NEXT: ret void |
| } |
| |
| attributes #0 = { "amdgpu-flat-work-group-size"="1,1" } |