| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK |
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,instcombine,early-cse,simplifycfg -S < %s | FileCheck %s -check-prefix=COMB-CHECK |
| |
| ; This should not be optimized |
| define amdgpu_cs void @temporal_divergence(ptr addrspace(1) %out, i32 %n) { |
| ; PASS-CHECK-LABEL: define amdgpu_cs void @temporal_divergence( |
| ; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; PASS-CHECK-NEXT: [[ENTRY:.*]]: |
| ; PASS-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; PASS-CHECK-NEXT: br label %[[H:.*]] |
| ; PASS-CHECK: [[H]]: |
| ; PASS-CHECK-NEXT: [[UNI_MERGE_H:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[UNI_INC:%.*]], %[[H]] ] |
| ; PASS-CHECK-NEXT: [[UNI_INC]] = add i32 [[UNI_MERGE_H]], 1 |
| ; PASS-CHECK-NEXT: [[DIV_EXITX:%.*]] = icmp eq i32 [[TID]], 0 |
| ; PASS-CHECK-NEXT: br i1 [[DIV_EXITX]], label %[[X:.*]], label %[[H]] |
| ; PASS-CHECK: [[X]]: |
| ; PASS-CHECK-NEXT: [[UNI_JOIN:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[UNI_INC]]) |
| ; PASS-CHECK-NEXT: [[JOIN_USER:%.*]] = add i32 [[UNI_JOIN]], 5 |
| ; PASS-CHECK-NEXT: store i32 [[JOIN_USER]], ptr addrspace(1) [[OUT]], align 4 |
| ; PASS-CHECK-NEXT: ret void |
| ; |
| ; COMB-CHECK-LABEL: define amdgpu_cs void @temporal_divergence( |
| ; COMB-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; COMB-CHECK-NEXT: [[ENTRY:.*]]: |
| ; COMB-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; COMB-CHECK-NEXT: br label %[[H:.*]] |
| ; COMB-CHECK: [[H]]: |
| ; COMB-CHECK-NEXT: [[UNI_MERGE_H:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[UNI_INC:%.*]], %[[H]] ] |
| ; COMB-CHECK-NEXT: [[UNI_INC]] = add i32 [[UNI_MERGE_H]], 1 |
| ; COMB-CHECK-NEXT: [[DIV_EXITX:%.*]] = icmp eq i32 [[TID]], 0 |
| ; COMB-CHECK-NEXT: br i1 [[DIV_EXITX]], label %[[X:.*]], label %[[H]] |
| ; COMB-CHECK: [[X]]: |
| ; COMB-CHECK-NEXT: [[UNI_JOIN:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[UNI_INC]]) |
| ; COMB-CHECK-NEXT: [[JOIN_USER:%.*]] = add i32 [[UNI_JOIN]], 5 |
| ; COMB-CHECK-NEXT: store i32 [[JOIN_USER]], ptr addrspace(1) [[OUT]], align 4 |
| ; COMB-CHECK-NEXT: ret void |
| ; |
| entry: |
| %tid = call i32 @llvm.amdgcn.workitem.id.x() |
| br label %H |
| |
| H: |
| %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ] |
| %uni.inc = add i32 %uni.merge.h, 1 |
| %div.exitx = icmp eq i32 %tid, 0 |
| br i1 %div.exitx, label %X, label %H ; divergent branch |
| |
| X: |
| %uni.join = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %uni.inc) |
| %join.user = add i32 %uni.join, 5 |
| store i32 %join.user, ptr addrspace(1) %out |
| ret void |
| } |
| |
| declare i32 @llvm.amdgcn.workitem.id.x() |
| declare i32 @llvm.amdgcn.readfirstlane.i32(i32) |