| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck %s |
| |
| ; Check that the copy from s[2:3] to v[0:1] occurs inside the loop, not after it. |
| |
| define i64 @test_temporal_divergence(i32 %arg) #0 { |
| ; CHECK-LABEL: test_temporal_divergence: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: v_add_u32_e32 v2, 1, v0 |
| ; CHECK-NEXT: s_mov_b64 s[2:3], 0 |
| ; CHECK-NEXT: s_mov_b64 s[0:1], 0 |
| ; CHECK-NEXT: .LBB0_1: ; %loop |
| ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: v_add_u32_e32 v2, -1, v2 |
| ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 |
| ; CHECK-NEXT: v_mov_b64_e32 v[0:1], s[2:3] |
| ; CHECK-NEXT: s_or_b64 s[0:1], vcc, s[0:1] |
| ; CHECK-NEXT: s_mov_b64 s[2:3], 1 |
| ; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1] |
| ; CHECK-NEXT: s_cbranch_execnz .LBB0_1 |
| ; CHECK-NEXT: ; %bb.2: ; %end |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| br label %loop |
| |
| loop: |
| %i = phi i64 [ 1, %loop ], [ 0, %entry ] |
| %count = phi i32 [ %inc, %loop ], [ 0, %entry ] |
| %inc = add i32 %count, 1 |
| %cond = icmp eq i32 %count, %arg |
| br i1 %cond, label %end, label %loop |
| |
| end: |
| ret i64 %i |
| } |