test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll - llvm - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s

 ; This module creates a divergent branch. The branch is marked as divergent by
 ; the divergence analysis but the condition is not. This test ensures that the
 ; divergence of the branch is tested, not its condition, so that branch is
 ; correctly emitted as divergent.

 target triple = "amdgcn-mesa-mesa3d"

 define amdgpu_ps void @main(i32, float) {
 ; CHECK-LABEL: main:
 ; CHECK:       ; %bb.0: ; %start
 ; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
 ; CHECK-NEXT:    s_mov_b32 m0, s0
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
 ; CHECK-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x
 ; CHECK-NEXT:    v_cmp_nlt_f32_e64 s[0:1], 0, v0
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; CHECK-NEXT:    ; implicit-def: $sgpr8_sgpr9
 ; CHECK-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
 ; CHECK-NEXT:    s_branch BB0_3
 ; CHECK-NEXT:  BB0_1: ; %Flow1
 ; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    s_mov_b64 s[8:9], 0
 ; CHECK-NEXT:  BB0_2: ; %Flow
 ; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    s_and_b64 s[10:11], exec, s[6:7]
 ; CHECK-NEXT:    s_or_b64 s[10:11], s[10:11], s[4:5]
 ; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
 ; CHECK-NEXT:    s_and_b64 s[4:5], s[8:9], exec
 ; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
 ; CHECK-NEXT:    s_mov_b64 s[4:5], s[10:11]
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[10:11]
 ; CHECK-NEXT:    s_cbranch_execz BB0_6
 ; CHECK-NEXT:  BB0_3: ; %loop
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 32, v1
 ; CHECK-NEXT:    s_and_b64 vcc, exec, vcc
 ; CHECK-NEXT:    s_or_b64 s[6:7], s[6:7], exec
 ; CHECK-NEXT:    s_or_b64 s[8:9], s[8:9], exec
 ; CHECK-NEXT:    s_cbranch_vccz BB0_2
 ; CHECK-NEXT:  ; %bb.4: ; %endif1
 ; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    s_mov_b64 s[6:7], -1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[0:1]
 ; CHECK-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
 ; CHECK-NEXT:    ; mask branch BB0_1
 ; CHECK-NEXT:    s_cbranch_execz BB0_1
 ; CHECK-NEXT:  BB0_5: ; %endif2
 ; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
 ; CHECK-NEXT:    s_xor_b64 s[6:7], exec, -1
 ; CHECK-NEXT:    s_branch BB0_1
 ; CHECK-NEXT:  BB0_6: ; %Flow2
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], s[2:3]
 ; CHECK-NEXT:    ; mask branch BB0_8
 ; CHECK-NEXT:  BB0_7: ; %if1
 ; CHECK-NEXT:    v_sqrt_f32_e32 v1, v0
 ; CHECK-NEXT:  BB0_8: ; %endloop
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; CHECK-NEXT:    exp mrt0 v1, v1, v1, v1 done vm
 ; CHECK-NEXT:    s_endpgm
 ; this is the divergent branch with the condition not marked as divergent
 start:
   %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
   br label %loop

 loop:
   %v1 = phi i32 [ 0, %start ], [ %v5, %endif2 ]
   %v2 = icmp ugt i32 %v1, 31
   br i1 %v2, label %if1, label %endif1

 if1:
   %v3 = call float @llvm.sqrt.f32(float %v0)
   br label %endloop

 endif1:
   %v4 = fcmp ogt float %v0, 0.000000e+00
   br i1 %v4, label %endloop, label %endif2

 endif2:
   %v5 = add i32 %v1, 1
   br label %loop

 endloop:
   %v6 = phi float [ %v3, %if1 ], [ 0.0, %endif1 ]
   call void @llvm.amdgcn.exp.v4f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)
   ret void
 }

 declare float @llvm.sqrt.f32(float) #1
 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
 declare void @llvm.amdgcn.exp.v4f32(i32, i32, float, float, float, float, i1, i1) #0

 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck %s

	; This module creates a divergent branch. The branch is marked as divergent by
	; the divergence analysis but the condition is not. This test ensures that the
	; divergence of the branch is tested, not its condition, so that branch is
	; correctly emitted as divergent.

	target triple = "amdgcn-mesa-mesa3d"

	define amdgpu_ps void @main(i32, float) {
	; CHECK-LABEL: main:
	; CHECK: ; %bb.0: ; %start
	; CHECK-NEXT: v_readfirstlane_b32 s0, v0
	; CHECK-NEXT: s_mov_b32 m0, s0
	; CHECK-NEXT: s_mov_b64 s[4:5], 0
	; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
	; CHECK-NEXT: v_cmp_nlt_f32_e64 s[0:1], 0, v0
	; CHECK-NEXT: v_mov_b32_e32 v1, 0
	; CHECK-NEXT: ; implicit-def: $sgpr8_sgpr9
	; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
	; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
	; CHECK-NEXT: s_branch BB0_3
	; CHECK-NEXT: BB0_1: ; %Flow1
	; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
	; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
	; CHECK-NEXT: s_mov_b64 s[8:9], 0
	; CHECK-NEXT: BB0_2: ; %Flow
	; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
	; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7]
	; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5]
	; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
	; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec
	; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
	; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11]
	; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
	; CHECK-NEXT: s_cbranch_execz BB0_6
	; CHECK-NEXT: BB0_3: ; %loop
	; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
	; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1
	; CHECK-NEXT: s_and_b64 vcc, exec, vcc
	; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
	; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], exec
	; CHECK-NEXT: s_cbranch_vccz BB0_2
	; CHECK-NEXT: ; %bb.4: ; %endif1
	; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
	; CHECK-NEXT: s_mov_b64 s[6:7], -1
	; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1]
	; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
	; CHECK-NEXT: ; mask branch BB0_1
	; CHECK-NEXT: s_cbranch_execz BB0_1
	; CHECK-NEXT: BB0_5: ; %endif2
	; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
	; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
	; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
	; CHECK-NEXT: s_branch BB0_1
	; CHECK-NEXT: BB0_6: ; %Flow2
	; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
	; CHECK-NEXT: v_mov_b32_e32 v1, 0
	; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
	; CHECK-NEXT: ; mask branch BB0_8
	; CHECK-NEXT: BB0_7: ; %if1
	; CHECK-NEXT: v_sqrt_f32_e32 v1, v0
	; CHECK-NEXT: BB0_8: ; %endloop
	; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
	; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
	; CHECK-NEXT: s_endpgm
	; this is the divergent branch with the condition not marked as divergent
	start:
	%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
	br label %loop

	loop:
	%v1 = phi i32 [ 0, %start ], [ %v5, %endif2 ]
	%v2 = icmp ugt i32 %v1, 31
	br i1 %v2, label %if1, label %endif1

	if1:
	%v3 = call float @llvm.sqrt.f32(float %v0)
	br label %endloop

	endif1:
	%v4 = fcmp ogt float %v0, 0.000000e+00
	br i1 %v4, label %endloop, label %endif2

	endif2:
	%v5 = add i32 %v1, 1
	br label %loop

	endloop:
	%v6 = phi float [ %v3, %if1 ], [ 0.0, %endif1 ]
	call void @llvm.amdgcn.exp.v4f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)
	ret void
	}

	declare float @llvm.sqrt.f32(float) #1
	declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
	declare void @llvm.amdgcn.exp.v4f32(i32, i32, float, float, float, float, i1, i1) #0

	attributes #0 = { nounwind }
	attributes #1 = { nounwind readnone }