| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals |
| ; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU |
| |
| target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" |
| target triple = "amdgcn-amd-amdhsa" |
| |
| %struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy.8, ptr, ptr } |
| %struct.ConfigurationEnvironmentTy.8 = type { i8, i8, i8 } |
| |
| @IsSPMDMode = internal addrspace(3) global i32 undef |
| @__omp_offloading_10302_b20a40e_main_l4_kernel_environment = addrspace(1) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy.8 { i8 1, i8 0, i8 1 }, ptr addrspacecast (ptr addrspace(1) null to ptr), ptr addrspacecast (ptr addrspace(1) null to ptr) } |
| |
| ;. |
| ; AMDGPU: @[[ISSPMDMODE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef |
| ; AMDGPU: @[[__OMP_OFFLOADING_10302_B20A40E_MAIN_L4_KERNEL_ENVIRONMENT:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) constant [[STRUCT_KERNELENVIRONMENTTY:%.*]] { [[STRUCT_CONFIGURATIONENVIRONMENTTY_8:%.*]] { i8 0, i8 0, i8 1 }, ptr addrspacecast (ptr addrspace(1) null to ptr), ptr addrspacecast (ptr addrspace(1) null to ptr) } |
| ;. |
| define i32 @fputs() { |
| ; AMDGPU-LABEL: define {{[^@]+}}@fputs |
| ; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { |
| ; AMDGPU-NEXT: fence acquire |
| ; AMDGPU-NEXT: ret i32 0 |
| ; |
| fence acquire |
| ret i32 0 |
| } |
| |
| define internal i32 @__kmpc_target_init(ptr %0) { |
| ; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init |
| ; AMDGPU-SAME: (ptr [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; AMDGPU-NEXT: [[TMP2:%.*]] = load i8, ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr), i64 2), align 2 |
| ; AMDGPU-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], 2 |
| ; AMDGPU-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0 |
| ; AMDGPU-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR3:[0-9]+]] |
| ; AMDGPU-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0 |
| ; AMDGPU-NEXT: [[OR_COND:%.*]] = select i1 [[TMP4]], i1 [[TMP6]], i1 false |
| ; AMDGPU-NEXT: br i1 [[OR_COND]], label [[TMP7:%.*]], label [[TMP8:%.*]] |
| ; AMDGPU: 7: |
| ; AMDGPU-NEXT: store i8 0, ptr addrspace(3) null, align 2147483648 |
| ; AMDGPU-NEXT: br label [[TMP8]] |
| ; AMDGPU: 8: |
| ; AMDGPU-NEXT: br label [[TMP10:%.*]] |
| ; AMDGPU: 9: |
| ; AMDGPU-NEXT: unreachable |
| ; AMDGPU: 10: |
| ; AMDGPU-NEXT: ret i32 0 |
| ; |
| %2 = getelementptr %struct.ConfigurationEnvironmentTy.8, ptr %0, i64 0, i32 2 |
| %3 = load i8, ptr %2, align 2 |
| %4 = and i8 %3, 2 |
| %5 = icmp ne i8 %4, 0 |
| %6 = tail call i32 @llvm.amdgcn.workitem.id.x() |
| %7 = icmp eq i32 %6, 0 |
| %or.cond = select i1 %5, i1 %7, i1 false |
| br i1 %or.cond, label %8, label %9 |
| |
| 8: ; preds = %1 |
| store i32 1, ptr addrspace(3) @IsSPMDMode, align 4 |
| store i8 0, ptr addrspace(3) null, align 2147483648 |
| br label %9 |
| |
| 9: ; preds = %8, %1 |
| %10 = load i32, ptr addrspace(3) @IsSPMDMode, align 4 |
| %11 = icmp eq i32 %10, 0 |
| br i1 %11, label %12, label %13 |
| |
| 12: ; preds = %9 |
| unreachable |
| |
| 13: ; preds = %9 |
| ret i32 0 |
| } |
| |
| ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| declare i32 @llvm.amdgcn.workitem.id.x() #0 |
| |
| declare void @__kmpc_target_deinit() |
| |
| define amdgpu_kernel void @__omp_offloading_10302_b20a40e_main_l4() { |
| ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_10302_b20a40e_main_l4() { |
| ; AMDGPU-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr)) #[[ATTR4:[0-9]+]] |
| ; AMDGPU-NEXT: br label [[TMP2:%.*]] |
| ; AMDGPU: 2: |
| ; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @fputs() #[[ATTR0]] |
| ; AMDGPU-NEXT: tail call void @__kmpc_target_deinit() |
| ; AMDGPU-NEXT: ret void |
| ; |
| %1 = tail call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_10302_b20a40e_main_l4_kernel_environment to ptr)) |
| br label %2 |
| |
| 2: ; preds = %0 |
| %3 = call i32 @fputs() |
| tail call void @__kmpc_target_deinit() |
| ret void |
| } |
| |
| attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
| |
| !llvm.module.flags = !{!0} |
| |
| !0 = !{i32 7, !"openmp", i32 51} |
| |
| ;. |
| ; AMDGPU: attributes #[[ATTR0]] = { nounwind } |
| ; AMDGPU: attributes #[[ATTR1]] = { norecurse nosync nounwind } |
| ; AMDGPU: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
| ; AMDGPU: attributes #[[ATTR3]] = { nosync } |
| ; AMDGPU: attributes #[[ATTR4]] = { nosync nounwind } |
| ;. |
| ; AMDGPU: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 51} |
| ;. |