| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-kernel-attributes %s | FileCheck %s |
| |
| define i32 @use_grid_size_x_max_num_workgroups() #0 { |
| ; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups( |
| ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG0:![0-9]+]] |
| ; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] |
| ; |
| %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4 |
| ret i32 %grid.size.x |
| } |
| |
| define i32 @use_grid_size_x_max_num_workgroups_existing_nonzero_range() #0 { |
| ; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_existing_nonzero_range( |
| ; CHECK-SAME: ) #[[ATTR0]] { |
| ; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG0]] |
| ; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] |
| ; |
| %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4, !range !0 |
| ret i32 %grid.size.x |
| } |
| |
| define i32 @use_grid_size_y_max_num_workgroups() #0 { |
| ; CHECK-LABEL: define i32 @use_grid_size_y_max_num_workgroups( |
| ; CHECK-SAME: ) #[[ATTR0]] { |
| ; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| ; CHECK-NEXT: [[GEP_GRID_SIZE_Y:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 4 |
| ; CHECK-NEXT: [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[GEP_GRID_SIZE_Y]], align 4, !range [[RNG1:![0-9]+]] |
| ; CHECK-NEXT: ret i32 [[GRID_SIZE_Y]] |
| ; |
| %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep.grid.size.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 4 |
| %grid.size.y = load i32, ptr addrspace(4) %gep.grid.size.y, align 4 |
| ret i32 %grid.size.y |
| } |
| |
| define i32 @use_grid_size_z_max_num_workgroups() #0 { |
| ; CHECK-LABEL: define i32 @use_grid_size_z_max_num_workgroups( |
| ; CHECK-SAME: ) #[[ATTR0]] { |
| ; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| ; CHECK-NEXT: [[GEP_GRID_SIZE_Z:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 8 |
| ; CHECK-NEXT: [[GRID_SIZE_Z:%.*]] = load i32, ptr addrspace(4) [[GEP_GRID_SIZE_Z]], align 4, !range [[RNG2:![0-9]+]] |
| ; CHECK-NEXT: ret i32 [[GRID_SIZE_Z]] |
| ; |
| %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep.grid.size.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 8 |
| %grid.size.z = load i32, ptr addrspace(4) %gep.grid.size.z, align 4 |
| ret i32 %grid.size.z |
| } |
| |
| define <2 x i16> @use_grid_size_x_max_num_workgroups_load_wrong_type() #0 { |
| ; CHECK-LABEL: define <2 x i16> @use_grid_size_x_max_num_workgroups_load_wrong_type( |
| ; CHECK-SAME: ) #[[ATTR0]] { |
| ; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load <2 x i16>, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4 |
| ; CHECK-NEXT: ret <2 x i16> [[GRID_SIZE_X]] |
| ; |
| %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %grid.size.x = load <2 x i16>, ptr addrspace(4) %implicitarg.ptr, align 4 |
| ret <2 x i16> %grid.size.x |
| } |
| |
| define i32 @use_grid_size_x_max_num_workgroups_max_minus_1() #1 { |
| ; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_max_minus_1( |
| ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { |
| ; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG3:![0-9]+]] |
| ; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] |
| ; |
| %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4 |
| ret i32 %grid.size.x |
| } |
| |
| define i32 @use_grid_size_x_max_num_workgroups_max() #2 { |
| ; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_max( |
| ; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { |
| ; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4 |
| ; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] |
| ; |
| %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4 |
| ret i32 %grid.size.x |
| } |
| |
| define i32 @use_grid_size_x_max_num_workgroups_zero() #3 { |
| ; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_zero( |
| ; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { |
| ; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| ; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4 |
| ; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] |
| ; |
| %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4 |
| ret i32 %grid.size.x |
| } |
| |
| declare noundef align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3 |
| |
| attributes #0 = { "amdgpu-max-num-workgroups"="36,42,89" } |
| attributes #1 = { "amdgpu-max-num-workgroups"="4294967294,42,89" } |
| attributes #2 = { "amdgpu-max-num-workgroups"="4294967295,42,89" } |
| attributes #3 = { "amdgpu-max-num-workgroups"="0,42,89" } |
| |
| !0 = !{i32 0, i32 -1} |
| |
| ;. |
| ; CHECK: attributes #[[ATTR0]] = { "amdgpu-max-num-workgroups"="36,42,89" } |
| ; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="4294967294,42,89" } |
| ; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="4294967295,42,89" } |
| ; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="0,42,89" } |
| ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } |
| ;. |
| ; CHECK: [[RNG0]] = !{i32 1, i32 37} |
| ; CHECK: [[RNG1]] = !{i32 1, i32 43} |
| ; CHECK: [[RNG2]] = !{i32 1, i32 90} |
| ; CHECK: [[RNG3]] = !{i32 1, i32 -1} |
| ;. |