llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll - llvm-project - Git at Google

 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -print-after=si-annotate-control-flow %s -o /dev/null 2>&1 | FileCheck %s

 target datalayout = "n32"

 ; CHECK-LABEL: @switch_unreachable_default

 define amdgpu_kernel void @switch_unreachable_default(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
 centry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   switch i32 %tid, label %sw.default [
     i32 0, label %sw.bb0
     i32 1, label %sw.bb1
   ]

 sw.bb0:
   br label %sw.epilog

 sw.bb1:
   br label %sw.epilog

 sw.default:
   unreachable

 sw.epilog:
   %ptr = phi ptr addrspace(1) [%in0, %sw.bb0], [%in1, %sw.bb1]
   br label %sw.while

 ; The loop below is necessary to preserve the effect of the
 ; unreachable default on divergence analysis in the presence of other
 ; optimizations. The loop consists of a single block where the loop
 ; exit is divergent because it depends on the divergent phi at the
 ; start of the block. The checks below ensure that the loop exit is
 ; handled correctly as divergent. But the data-flow within the block
 ; is sensitive to optimizations; so we just ensure that the relevant
 ; operations in the block body are indeed in the same block.

 ; CHECK: [[PHI:%[a-zA-Z0-9._]+]]  = phi i64
 ; CHECK-NOT: {{ br }}
 ; CHECK: load i8
 ; CHECK-NOT: {{ br }}
 ; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq
 ; CHECK: [[IF:%[a-zA-Z0-9._]+]]   = call i64 @llvm.amdgcn.if.break.i64(i1 [[ICMP]], i64 [[PHI]])
 ; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop.i64(i64 [[IF]])
 ; CHECK: br i1 [[LOOP]]

 sw.while:
   %p = phi ptr addrspace(1) [ %ptr, %sw.epilog ], [ %incdec.ptr, %sw.while ]
   %count = phi i32 [ 0, %sw.epilog ], [ %count.inc, %sw.while ]
   %char = load i8, ptr addrspace(1) %p, align 1
   %tobool = icmp eq i8 %char, 0
   %incdec.ptr = getelementptr inbounds i8, ptr addrspace(1) %p, i64 1
   %count.inc = add i32 %count, 1
   br i1 %tobool, label %sw.exit, label %sw.while

 sw.exit:
   %tid64 = zext i32 %tid to i64
   %gep_out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid64
   store i32 %count, ptr addrspace(1) %gep_out, align 4
   ret void
 }

 declare i32 @llvm.amdgcn.workitem.id.x() #0

 attributes #0 = { nounwind readnone }
 attributes #1 = { convergent noinline optnone }
	; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -print-after=si-annotate-control-flow %s -o /dev/null 2>&1 \| FileCheck %s

	target datalayout = "n32"

	; CHECK-LABEL: @switch_unreachable_default

	define amdgpu_kernel void @switch_unreachable_default(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
	centry:
	%tid = call i32 @llvm.amdgcn.workitem.id.x()
	switch i32 %tid, label %sw.default [
	i32 0, label %sw.bb0
	i32 1, label %sw.bb1
	]

	sw.bb0:
	br label %sw.epilog

	sw.bb1:
	br label %sw.epilog

	sw.default:
	unreachable

	sw.epilog:
	%ptr = phi ptr addrspace(1) [%in0, %sw.bb0], [%in1, %sw.bb1]
	br label %sw.while

	; The loop below is necessary to preserve the effect of the
	; unreachable default on divergence analysis in the presence of other
	; optimizations. The loop consists of a single block where the loop
	; exit is divergent because it depends on the divergent phi at the
	; start of the block. The checks below ensure that the loop exit is
	; handled correctly as divergent. But the data-flow within the block
	; is sensitive to optimizations; so we just ensure that the relevant
	; operations in the block body are indeed in the same block.

	; CHECK: [[PHI:%[a-zA-Z0-9._]+]] = phi i64
	; CHECK-NOT: {{ br }}
	; CHECK: load i8
	; CHECK-NOT: {{ br }}
	; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq
	; CHECK: [[IF:%[a-zA-Z0-9._]+]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[ICMP]], i64 [[PHI]])
	; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop.i64(i64 [[IF]])
	; CHECK: br i1 [[LOOP]]

	sw.while:
	%p = phi ptr addrspace(1) [ %ptr, %sw.epilog ], [ %incdec.ptr, %sw.while ]
	%count = phi i32 [ 0, %sw.epilog ], [ %count.inc, %sw.while ]
	%char = load i8, ptr addrspace(1) %p, align 1
	%tobool = icmp eq i8 %char, 0
	%incdec.ptr = getelementptr inbounds i8, ptr addrspace(1) %p, i64 1
	%count.inc = add i32 %count, 1
	br i1 %tobool, label %sw.exit, label %sw.while

	sw.exit:
	%tid64 = zext i32 %tid to i64
	%gep_out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid64
	store i32 %count, ptr addrspace(1) %gep_out, align 4
	ret void
	}

	declare i32 @llvm.amdgcn.workitem.id.x() #0

	attributes #0 = { nounwind readnone }
	attributes #1 = { convergent noinline optnone }