| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY |
| ; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck %s |
| |
| declare void @llvm.trap() |
| declare i32 @llvm.amdgcn.workitem.id.x() |
| |
| define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) { |
| ; This used to bypass the structurization process because structurizer is unable to |
| ; handle multiple-exits CFG. This should be correctly structurized. |
| ; CHECK-LABEL: kernel: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_load_dword s0, s[8:9], 0x10 |
| ; CHECK-NEXT: s_load_dword s10, s[8:9], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_cmpk_lg_i32 s0, 0x100 |
| ; CHECK-NEXT: s_cbranch_scc0 .LBB0_6 |
| ; CHECK-NEXT: ; %bb.1: ; %if.else |
| ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0 |
| ; CHECK-NEXT: s_mov_b64 s[4:5], 0 |
| ; CHECK-NEXT: s_mov_b64 s[2:3], 0 |
| ; CHECK-NEXT: s_mov_b64 s[0:1], 0 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_5 |
| ; CHECK-NEXT: ; %bb.2: ; %if.then3 |
| ; CHECK-NEXT: s_cmp_lg_u32 s10, 0 |
| ; CHECK-NEXT: s_cbranch_scc1 .LBB0_14 |
| ; CHECK-NEXT: ; %bb.3: |
| ; CHECK-NEXT: s_mov_b64 s[0:1], -1 |
| ; CHECK-NEXT: .LBB0_4: ; %Flow3 |
| ; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec |
| ; CHECK-NEXT: s_and_b64 s[2:3], s[2:3], exec |
| ; CHECK-NEXT: .LBB0_5: ; %Flow2 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] |
| ; CHECK-NEXT: s_and_b64 vcc, exec, s[4:5] |
| ; CHECK-NEXT: s_cbranch_vccz .LBB0_8 |
| ; CHECK-NEXT: s_branch .LBB0_7 |
| ; CHECK-NEXT: .LBB0_6: |
| ; CHECK-NEXT: s_mov_b64 s[2:3], 0 |
| ; CHECK-NEXT: s_mov_b64 s[0:1], 0 |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_8 |
| ; CHECK-NEXT: .LBB0_7: ; %if.then |
| ; CHECK-NEXT: s_cmp_lg_u32 s10, 0 |
| ; CHECK-NEXT: s_mov_b64 s[0:1], -1 |
| ; CHECK-NEXT: s_cbranch_scc1 .LBB0_13 |
| ; CHECK-NEXT: .LBB0_8: ; %Flow4 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[2:3] |
| ; CHECK-NEXT: .LBB0_9: ; %UnifiedUnreachableBlock |
| ; CHECK-NEXT: ; divergent unreachable |
| ; CHECK-NEXT: .LBB0_10: ; %Flow6 |
| ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; CHECK-NEXT: s_and_saveexec_b64 s[2:3], s[0:1] |
| ; CHECK-NEXT: s_cbranch_execz .LBB0_12 |
| ; CHECK-NEXT: ; %bb.11: ; %if.end6.sink.split |
| ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s10 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dword v0, v1, s[0:1] |
| ; CHECK-NEXT: .LBB0_12: ; %UnifiedReturnBlock |
| ; CHECK-NEXT: s_endpgm |
| ; CHECK-NEXT: .LBB0_13: ; %cond.false |
| ; CHECK-NEXT: s_mov_b64 s[0:1], 0 |
| ; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], exec |
| ; CHECK-NEXT: s_trap 2 |
| ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[2:3] |
| ; CHECK-NEXT: s_cbranch_execnz .LBB0_9 |
| ; CHECK-NEXT: s_branch .LBB0_10 |
| ; CHECK-NEXT: .LBB0_14: ; %cond.false.i8 |
| ; CHECK-NEXT: s_mov_b64 s[2:3], -1 |
| ; CHECK-NEXT: s_trap 2 |
| ; CHECK-NEXT: s_branch .LBB0_4 |
| ; UNIFY-LABEL: @kernel( |
| ; UNIFY-NEXT: entry: |
| ; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256 |
| ; UNIFY-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] |
| ; UNIFY: if.then: |
| ; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 |
| ; UNIFY-NEXT: br i1 [[CMP1]], label [[IF_END6_SINK_SPLIT:%.*]], label [[COND_FALSE:%.*]] |
| ; UNIFY: cond.false: |
| ; UNIFY-NEXT: call void @llvm.trap() |
| ; UNIFY-NEXT: unreachable |
| ; UNIFY: if.else: |
| ; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10 |
| ; UNIFY-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END6:%.*]] |
| ; UNIFY: if.then3: |
| ; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0 |
| ; UNIFY-NEXT: br i1 [[CMP1_I7]], label [[IF_END6_SINK_SPLIT]], label [[COND_FALSE_I8:%.*]] |
| ; UNIFY: cond.false.i8: |
| ; UNIFY-NEXT: call void @llvm.trap() |
| ; UNIFY-NEXT: unreachable |
| ; UNIFY: if.end6.sink.split: |
| ; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]] |
| ; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4 |
| ; UNIFY-NEXT: br label [[IF_END6]] |
| ; UNIFY: if.end6: |
| ; UNIFY-NEXT: ret void |
| ; |
| entry: |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| %cmp = icmp eq i32 %n, 256 |
| br i1 %cmp, label %if.then, label %if.else |
| |
| if.then: |
| %cmp1 = icmp eq i32 %a, 0 |
| br i1 %cmp1, label %if.end6.sink.split, label %cond.false |
| |
| cond.false: |
| call void @llvm.trap() |
| unreachable |
| |
| if.else: |
| %cmp2 = icmp ult i32 %tid, 10 |
| br i1 %cmp2, label %if.then3, label %if.end6 |
| |
| if.then3: |
| %cmp1.i7 = icmp eq i32 %a, 0 |
| br i1 %cmp1.i7, label %if.end6.sink.split, label %cond.false.i8 |
| |
| cond.false.i8: |
| call void @llvm.trap() |
| unreachable |
| |
| if.end6.sink.split: |
| %x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid |
| store i32 %a, ptr addrspace(1) %x1, align 4 |
| br label %if.end6 |
| |
| if.end6: |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel_callbr(i32 %a, ptr addrspace(1) %x, i32 noundef %n) { |
| ; CHECK-LABEL: kernel_callbr: |
| ; CHECK: ; %bb.0: ; %entry |
| ; CHECK-NEXT: s_load_dword s1, s[8:9], 0x10 |
| ; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: s_cmpk_eq_i32 s1, 0x100 |
| ; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] |
| ; CHECK-NEXT: ;;#ASMSTART |
| ; CHECK-NEXT: ;;#ASMEND |
| ; CHECK-NEXT: ; %bb.1: ; %if.then |
| ; CHECK-NEXT: s_cmp_eq_u32 s0, 0 |
| ; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] |
| ; CHECK-NEXT: ;;#ASMSTART |
| ; CHECK-NEXT: ;;#ASMEND |
| ; CHECK-NEXT: .LBB1_2: ; %if.end6.sink.split |
| ; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x8 |
| ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| ; CHECK-NEXT: v_mov_b32_e32 v1, s0 |
| ; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| ; CHECK-NEXT: global_store_dword v0, v1, s[2:3] |
| ; CHECK-NEXT: ;;#ASMSTART |
| ; CHECK-NEXT: ;;#ASMEND |
| ; CHECK-NEXT: .LBB1_3: ; Inline asm indirect target |
| ; CHECK-NEXT: ; %UnifiedReturnBlock |
| ; CHECK-NEXT: ; Label of block must be emitted |
| ; CHECK-NEXT: s_endpgm |
| ; CHECK-NEXT: .LBB1_4: ; Inline asm indirect target |
| ; CHECK-NEXT: ; %if.else |
| ; CHECK-NEXT: ; Label of block must be emitted |
| ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc |
| ; CHECK-NEXT: ;;#ASMSTART |
| ; CHECK-NEXT: ;;#ASMEND |
| ; CHECK-NEXT: ; %bb.5: ; %if.then3 |
| ; CHECK-NEXT: s_cmp_eq_u32 s0, 0 |
| ; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 |
| ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] |
| ; CHECK-NEXT: ;;#ASMSTART |
| ; CHECK-NEXT: ;;#ASMEND |
| ; CHECK-NEXT: s_branch .LBB1_2 |
| ; CHECK-NEXT: .LBB1_6: ; Inline asm indirect target |
| ; CHECK-NEXT: ; %cond.false.i8 |
| ; CHECK-NEXT: ; Label of block must be emitted |
| ; CHECK-NEXT: .LBB1_7: ; Inline asm indirect target |
| ; CHECK-NEXT: ; %cond.false |
| ; CHECK-NEXT: ; Label of block must be emitted |
| ; CHECK-NEXT: s_trap 2 |
| ; CHECK-NEXT: ; divergent unreachable |
| ; CHECK-NEXT: s_branch .LBB1_3 |
| ; UNIFY-LABEL: @kernel_callbr( |
| ; UNIFY-NEXT: entry: |
| ; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256 |
| ; UNIFY-NEXT: [[CMP32:%.*]] = zext i1 [[CMP]] to i32 |
| ; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP32]]) |
| ; UNIFY-NEXT: to label [[IF_THEN:%.*]] [label %if.else] |
| ; UNIFY: if.then: |
| ; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 |
| ; UNIFY-NEXT: [[CMP1_32:%.*]] = zext i1 [[CMP1]] to i32 |
| ; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_32]]) |
| ; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT:%.*]] [label %cond.false] |
| ; UNIFY: cond.false: |
| ; UNIFY-NEXT: call void @llvm.trap() |
| ; UNIFY-NEXT: unreachable |
| ; UNIFY: if.else: |
| ; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10 |
| ; UNIFY-NEXT: [[CMP2_32:%.*]] = zext i1 [[CMP2]] to i32 |
| ; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP2_32]]) |
| ; UNIFY-NEXT: to label [[IF_THEN3:%.*]] [label %if.end6] |
| ; UNIFY: if.then3: |
| ; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0 |
| ; UNIFY-NEXT: [[CMP1_I7_32:%.*]] = zext i1 [[CMP1_I7]] to i32 |
| ; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_I7_32]]) |
| ; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT]] [label %cond.false.i8] |
| ; UNIFY: cond.false.i8: |
| ; UNIFY-NEXT: call void @llvm.trap() |
| ; UNIFY-NEXT: unreachable |
| ; UNIFY: if.end6.sink.split: |
| ; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]] |
| ; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4 |
| ; UNIFY-NEXT: callbr void asm "", ""() |
| ; UNIFY-NEXT: to label [[IF_END6:%.*]] [] |
| ; UNIFY: if.end6: |
| ; UNIFY-NEXT: ret void |
| ; |
| entry: |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| %cmp = icmp eq i32 %n, 256 |
| %cmp32 = zext i1 %cmp to i32 |
| callbr void asm "", "r,!i"(i32 %cmp32) to label %if.then [label %if.else] |
| |
| if.then: |
| %cmp1 = icmp eq i32 %a, 0 |
| %cmp1_32 = zext i1 %cmp1 to i32 |
| callbr void asm "", "r,!i"(i32 %cmp1_32) to label %if.end6.sink.split [label %cond.false] |
| |
| cond.false: |
| call void @llvm.trap() |
| unreachable |
| |
| if.else: |
| %cmp2 = icmp ult i32 %tid, 10 |
| %cmp2_32 = zext i1 %cmp2 to i32 |
| callbr void asm "", "r,!i"(i32 %cmp2_32) to label %if.then3 [label %if.end6] |
| |
| if.then3: |
| %cmp1.i7 = icmp eq i32 %a, 0 |
| %cmp1.i7_32 = zext i1 %cmp1.i7 to i32 |
| callbr void asm "", "r,!i"(i32 %cmp1.i7_32) to label %if.end6.sink.split [label %cond.false.i8] |
| |
| cond.false.i8: |
| call void @llvm.trap() |
| unreachable |
| |
| if.end6.sink.split: |
| %x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid |
| store i32 %a, ptr addrspace(1) %x1, align 4 |
| callbr void asm "", ""() to label %if.end6 [] |
| |
| if.end6: |
| ret void |
| } |