blob: 2fde3e3759f47a89ad6fb8b7c1b46916c17dc046 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,instcombine,early-cse,simplifycfg -S < %s | FileCheck %s -check-prefix=COMB-CHECK
; This should not be optimized
define amdgpu_cs void @temporal_divergence(ptr addrspace(1) %out, i32 %n) {
; PASS-CHECK-LABEL: define amdgpu_cs void @temporal_divergence(
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; PASS-CHECK-NEXT: [[ENTRY:.*]]:
; PASS-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; PASS-CHECK-NEXT: br label %[[H:.*]]
; PASS-CHECK: [[H]]:
; PASS-CHECK-NEXT: [[UNI_MERGE_H:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[UNI_INC:%.*]], %[[H]] ]
; PASS-CHECK-NEXT: [[UNI_INC]] = add i32 [[UNI_MERGE_H]], 1
; PASS-CHECK-NEXT: [[DIV_EXITX:%.*]] = icmp eq i32 [[TID]], 0
; PASS-CHECK-NEXT: br i1 [[DIV_EXITX]], label %[[X:.*]], label %[[H]]
; PASS-CHECK: [[X]]:
; PASS-CHECK-NEXT: [[UNI_JOIN:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[UNI_INC]])
; PASS-CHECK-NEXT: [[JOIN_USER:%.*]] = add i32 [[UNI_JOIN]], 5
; PASS-CHECK-NEXT: store i32 [[JOIN_USER]], ptr addrspace(1) [[OUT]], align 4
; PASS-CHECK-NEXT: ret void
;
; COMB-CHECK-LABEL: define amdgpu_cs void @temporal_divergence(
; COMB-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; COMB-CHECK-NEXT: [[ENTRY:.*]]:
; COMB-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; COMB-CHECK-NEXT: br label %[[H:.*]]
; COMB-CHECK: [[H]]:
; COMB-CHECK-NEXT: [[UNI_MERGE_H:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[UNI_INC:%.*]], %[[H]] ]
; COMB-CHECK-NEXT: [[UNI_INC]] = add i32 [[UNI_MERGE_H]], 1
; COMB-CHECK-NEXT: [[DIV_EXITX:%.*]] = icmp eq i32 [[TID]], 0
; COMB-CHECK-NEXT: br i1 [[DIV_EXITX]], label %[[X:.*]], label %[[H]]
; COMB-CHECK: [[X]]:
; COMB-CHECK-NEXT: [[UNI_JOIN:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[UNI_INC]])
; COMB-CHECK-NEXT: [[JOIN_USER:%.*]] = add i32 [[UNI_JOIN]], 5
; COMB-CHECK-NEXT: store i32 [[JOIN_USER]], ptr addrspace(1) [[OUT]], align 4
; COMB-CHECK-NEXT: ret void
;
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
br label %H
H:
%uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ]
%uni.inc = add i32 %uni.merge.h, 1
%div.exitx = icmp eq i32 %tid, 0
br i1 %div.exitx, label %X, label %H ; divergent branch
X:
%uni.join = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %uni.inc)
%join.user = add i32 %uni.join, 5
store i32 %join.user, ptr addrspace(1) %out
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.readfirstlane.i32(i32)