| ; RUN: opt %s -analyze -divergence -use-gpu-divergence-analysis | FileCheck %s |
| |
| target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" |
| target triple = "nvptx64-nvidia-cuda" |
| |
| ; return (n < 0 ? a + threadIdx.x : b + threadIdx.x) |
| define i32 @no_diverge(i32 %n, i32 %a, i32 %b) { |
| ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'no_diverge' |
| entry: |
| %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() |
| %cond = icmp slt i32 %n, 0 |
| br i1 %cond, label %then, label %else ; uniform |
| ; CHECK-NOT: DIVERGENT: br i1 %cond, |
| then: |
| %a1 = add i32 %a, %tid |
| br label %merge |
| else: |
| %b2 = add i32 %b, %tid |
| br label %merge |
| merge: |
| %c = phi i32 [ %a1, %then ], [ %b2, %else ] |
| ret i32 %c |
| } |
| |
| ; c = a; |
| ; if (threadIdx.x < 5) // divergent: data dependent |
| ; c = b; |
| ; return c; // c is divergent: sync dependent |
| define i32 @sync(i32 %a, i32 %b) { |
| ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'sync' |
| bb1: |
| %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() |
| %cond = icmp slt i32 %tid, 5 |
| br i1 %cond, label %bb2, label %bb3 |
| ; CHECK: DIVERGENT: br i1 %cond, |
| bb2: |
| br label %bb3 |
| bb3: |
| %c = phi i32 [ %a, %bb1 ], [ %b, %bb2 ] ; sync dependent on tid |
| ; CHECK: DIVERGENT: %c = |
| ret i32 %c |
| } |
| |
| ; c = 0; |
| ; if (threadIdx.x >= 5) { // divergent |
| ; c = (n < 0 ? a : b); // c here is uniform because n is uniform |
| ; } |
| ; // c here is divergent because it is sync dependent on threadIdx.x >= 5 |
| ; return c; |
| define i32 @mixed(i32 %n, i32 %a, i32 %b) { |
| ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'mixed' |
| bb1: |
| %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() |
| %cond = icmp slt i32 %tid, 5 |
| br i1 %cond, label %bb6, label %bb2 |
| ; CHECK: DIVERGENT: br i1 %cond, |
| bb2: |
| %cond2 = icmp slt i32 %n, 0 |
| br i1 %cond2, label %bb4, label %bb3 |
| bb3: |
| br label %bb5 |
| bb4: |
| br label %bb5 |
| bb5: |
| %c = phi i32 [ %a, %bb3 ], [ %b, %bb4 ] |
| ; CHECK-NOT: DIVERGENT: %c = |
| br label %bb6 |
| bb6: |
| %c2 = phi i32 [ 0, %bb1], [ %c, %bb5 ] |
| ; CHECK: DIVERGENT: %c2 = |
| ret i32 %c2 |
| } |
| |
| ; We conservatively treats all parameters of a __device__ function as divergent. |
| define i32 @device(i32 %n, i32 %a, i32 %b) { |
| ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'device' |
| ; CHECK: DIVERGENT: i32 %n |
| ; CHECK: DIVERGENT: i32 %a |
| ; CHECK: DIVERGENT: i32 %b |
| entry: |
| %cond = icmp slt i32 %n, 0 |
| br i1 %cond, label %then, label %else |
| ; CHECK: DIVERGENT: br i1 %cond, |
| then: |
| br label %merge |
| else: |
| br label %merge |
| merge: |
| %c = phi i32 [ %a, %then ], [ %b, %else ] |
| ret i32 %c |
| } |
| |
| ; int i = 0; |
| ; do { |
| ; i++; // i here is uniform |
| ; } while (i < laneid); |
| ; return i == 10 ? 0 : 1; // i here is divergent |
| ; |
| ; The i defined in the loop is used outside. |
| define i32 @loop() { |
| ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'loop' |
| entry: |
| %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid() |
| br label %loop |
| loop: |
| %i = phi i32 [ 0, %entry ], [ %i1, %loop ] |
| ; CHECK-NOT: DIVERGENT: %i = |
| %i1 = add i32 %i, 1 |
| %exit_cond = icmp sge i32 %i1, %laneid |
| br i1 %exit_cond, label %loop_exit, label %loop |
| loop_exit: |
| %cond = icmp eq i32 %i, 10 |
| br i1 %cond, label %then, label %else |
| ; CHECK: DIVERGENT: br i1 %cond, |
| then: |
| ret i32 0 |
| else: |
| ret i32 1 |
| } |
| |
| ; Same as @loop, but the loop is in the LCSSA form. |
| define i32 @lcssa() { |
| ; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'lcssa' |
| entry: |
| %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() |
| br label %loop |
| loop: |
| %i = phi i32 [ 0, %entry ], [ %i1, %loop ] |
| ; CHECK-NOT: DIVERGENT: %i = |
| %i1 = add i32 %i, 1 |
| %exit_cond = icmp sge i32 %i1, %tid |
| br i1 %exit_cond, label %loop_exit, label %loop |
| loop_exit: |
| %i.lcssa = phi i32 [ %i, %loop ] |
| ; CHECK: DIVERGENT: %i.lcssa = |
| %cond = icmp eq i32 %i.lcssa, 10 |
| br i1 %cond, label %then, label %else |
| ; CHECK: DIVERGENT: br i1 %cond, |
| then: |
| ret i32 0 |
| else: |
| ret i32 1 |
| } |
| |
| ; Verifies sync-dependence is computed correctly in the absense of loops. |
| define i32 @sync_no_loop(i32 %arg) { |
| entry: |
| %0 = add i32 %arg, 1 |
| %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() |
| %1 = icmp sge i32 %tid, 10 |
| br i1 %1, label %bb1, label %bb2 |
| |
| bb1: |
| br label %bb3 |
| |
| bb2: |
| br label %bb3 |
| |
| bb3: |
| %2 = add i32 %0, 2 |
| ; CHECK-NOT: DIVERGENT: %2 |
| ret i32 %2 |
| } |
| |
| declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() |
| declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() |
| declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() |
| declare i32 @llvm.nvvm.read.ptx.sreg.laneid() |
| |
| !nvvm.annotations = !{!0, !1, !2, !3, !4} |
| !0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1} |
| !1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1} |
| !2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1} |
| !3 = !{i32 ()* @loop, !"kernel", i32 1} |
| !4 = !{i32 (i32)* @sync_no_loop, !"kernel", i32 1} |