| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s |
| |
| ; Check propagation of amdgpu-flat-work-group-size attribute. |
| |
| ; Called from a single kernel with 1,8 |
| define internal void @default_to_1_8_a() { |
| ; CHECK-LABEL: define internal void @default_to_1_8_a |
| ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel_1_8() #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_8 |
| ; CHECK-SAME: () #[[ATTR0]] { |
| ; CHECK-NEXT: call void @default_to_1_8_a() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @default_to_1_8_a() |
| ret void |
| } |
| |
| ; Called from a single kernel with 1,2 |
| define internal void @default_to_1_2() { |
| ; CHECK-LABEL: define internal void @default_to_1_2 |
| ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel_1_2() #1 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_2 |
| ; CHECK-SAME: () #[[ATTR1]] { |
| ; CHECK-NEXT: call void @default_to_1_2() |
| ; CHECK-NEXT: call void @flat_group_1_1() |
| ; CHECK-NEXT: call void @default_to_1_8_b() |
| ; CHECK-NEXT: call void @flat_group_2_8() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @default_to_1_2() |
| call void @flat_group_1_1() |
| call void @default_to_1_8_b() |
| call void @flat_group_2_8() |
| ret void |
| } |
| |
| ; Called from a single kernel with 1,4 |
| define internal void @default_to_1_4() { |
| ; CHECK-LABEL: define internal void @default_to_1_4 |
| ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel_1_4() #2 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_1_4 |
| ; CHECK-SAME: () #[[ATTR2]] { |
| ; CHECK-NEXT: call void @default_to_1_4() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @default_to_1_4() |
| ret void |
| } |
| |
| ; Called from kernels with 2,9 and 9,9 |
| define internal void @default_to_2_9() { |
| ; CHECK-LABEL: define internal void @default_to_2_9 |
| ; CHECK-SAME: () #[[ATTR3:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| ; This already has strict bounds, but called from kernels with wider |
| ; bounds, and should not be changed. |
| define internal void @flat_group_1_1() #3 { |
| ; CHECK-LABEL: define internal void @flat_group_1_1 |
| ; CHECK-SAME: () #[[ATTR4:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| ; 2,8 -> 2,2 |
| define internal void @flat_group_2_8() #4 { |
| ; CHECK-LABEL: define internal void @flat_group_2_8 |
| ; CHECK-SAME: () #[[ATTR5:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| ; 9,10 -> 9,9 |
| define internal void @flat_group_9_10() #5 { |
| ; CHECK-LABEL: define internal void @flat_group_9_10 |
| ; CHECK-SAME: () #[[ATTR6:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel_2_9() #6 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_2_9 |
| ; CHECK-SAME: () #[[ATTR3]] { |
| ; CHECK-NEXT: call void @default_to_2_9() |
| ; CHECK-NEXT: call void @flat_group_1_1() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @default_to_2_9() |
| call void @flat_group_1_1() |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel_9_9() #7 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_9_9 |
| ; CHECK-SAME: () #[[ATTR7:[0-9]+]] { |
| ; CHECK-NEXT: call void @default_to_2_9() |
| ; CHECK-NEXT: call void @flat_group_9_10() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @default_to_2_9() |
| call void @flat_group_9_10() |
| ret void |
| } |
| |
| ; Called from kernels with 2,8 and 1,2 => 1,8 |
| define internal void @default_to_1_8_b() { |
| ; CHECK-LABEL: define internal void @default_to_1_8_b |
| ; CHECK-SAME: () #[[ATTR0]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| ; The kernel's lower bound is higher than the callee's lower bound, so |
| ; this should probably be illegal. |
| define amdgpu_kernel void @kernel_2_8() #4 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_2_8 |
| ; CHECK-SAME: () #[[ATTR5]] { |
| ; CHECK-NEXT: call void @default_to_1_8_a() |
| ; CHECK-NEXT: call void @default_to_1_8_b() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @default_to_1_8_a() |
| call void @default_to_1_8_b() |
| ret void |
| } |
| |
| ; 1,2 -> 2,2 |
| define internal void @merge_cycle_0() #1 { |
| ; CHECK-LABEL: define internal void @merge_cycle_0 |
| ; CHECK-SAME: () #[[ATTR1]] { |
| ; CHECK-NEXT: call void @merge_cycle_1() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @merge_cycle_1() |
| ret void |
| } |
| |
| ; Called from 1,2 + 3,8 |
| ; 2,8 -> 2,8 |
| define internal void @merge_cycle_1() #4 { |
| ; CHECK-LABEL: define internal void @merge_cycle_1 |
| ; CHECK-SAME: () #[[ATTR5]] { |
| ; CHECK-NEXT: call void @merge_cycle_0() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @merge_cycle_0() |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel_3_8() #8 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_3_8 |
| ; CHECK-SAME: () #[[ATTR8:[0-9]+]] { |
| ; CHECK-NEXT: call void @merge_cycle_0() |
| ; CHECK-NEXT: call void @default_captured_address() |
| ; CHECK-NEXT: call void @externally_visible_default() |
| ; CHECK-NEXT: [[F32:%.*]] = call float @bitcasted_function() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @merge_cycle_0() |
| call void @default_captured_address() |
| call void @externally_visible_default() |
| %f32 = call float @bitcasted_function() |
| ret void |
| } |
| |
| define internal void @default_captured_address() { |
| ; CHECK-LABEL: define internal void @default_captured_address |
| ; CHECK-SAME: () #[[ATTR9:[0-9]+]] { |
| ; CHECK-NEXT: store volatile ptr @default_captured_address, ptr undef, align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| store volatile ptr @default_captured_address, ptr undef, align 8 |
| ret void |
| } |
| |
| define void @externally_visible_default() { |
| ; CHECK-LABEL: define void @externally_visible_default |
| ; CHECK-SAME: () #[[ATTR9]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| ; 1,10 -> 3,8 |
| define internal i32 @bitcasted_function() { |
| ; CHECK-LABEL: define internal i32 @bitcasted_function |
| ; CHECK-SAME: () #[[ATTR8]] { |
| ; CHECK-NEXT: ret i32 0 |
| ; |
| ret i32 0 |
| } |
| |
| define internal void @called_from_invalid_bounds_0() { |
| ; CHECK-LABEL: define internal void @called_from_invalid_bounds_0 |
| ; CHECK-SAME: () #[[ATTR10:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| define internal void @called_from_invalid_bounds_1() { |
| ; CHECK-LABEL: define internal void @called_from_invalid_bounds_1 |
| ; CHECK-SAME: () #[[ATTR10]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| ; Invalid range for amdgpu-waves-per-eu |
| define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_0_8 |
| ; CHECK-SAME: () #[[ATTR0]] { |
| ; CHECK-NEXT: call void @called_from_invalid_bounds_0() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @called_from_invalid_bounds_0() |
| ret void |
| } |
| |
| ; Invalid range for amdgpu-waves-per-eu |
| define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_1_123 |
| ; CHECK-SAME: () #[[ATTR11:[0-9]+]] { |
| ; CHECK-NEXT: call void @called_from_invalid_bounds_1() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @called_from_invalid_bounds_1() |
| ret void |
| } |
| |
| ; XXX - Why is the maximum not 6? |
| ; The 512 maximum workgroup size implies a minimum occupancy of 2. The |
| ; implied minimum waves-per-eu should not be 3 |
| ; -> 2,10 |
| define void @larger_group_size_implies_lower_minimum() #11 { |
| ; CHECK-LABEL: define void @larger_group_size_implies_lower_minimum |
| ; CHECK-SAME: () #[[ATTR12:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel_3_6() #12 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_3_6 |
| ; CHECK-SAME: () #[[ATTR13:[0-9]+]] { |
| ; CHECK-NEXT: call void @larger_group_size_implies_lower_minimum() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @larger_group_size_implies_lower_minimum() |
| ret void |
| } |
| |
| ; 3,6 -> 6,9 |
| define internal void @refine_upper_func_3_6() #13 { |
| ; CHECK-LABEL: define internal void @refine_upper_func_3_6 |
| ; CHECK-SAME: () #[[ATTR14:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| ; 4,8 -> 6,8 |
| define internal void @refine_lower_func_4_8() #14 { |
| ; CHECK-LABEL: define internal void @refine_lower_func_4_8 |
| ; CHECK-SAME: () #[[ATTR15:[0-9]+]] { |
| ; CHECK-NEXT: call void @refine_upper_func_3_6() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @refine_upper_func_3_6() |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel_foo_6_8() #15 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_foo_6_8 |
| ; CHECK-SAME: () #[[ATTR16:[0-9]+]] { |
| ; CHECK-NEXT: call void @refine_upper_func_3_6() |
| ; CHECK-NEXT: call void @refine_lower_func_4_8() |
| ; CHECK-NEXT: call void @func_9_10_a() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @refine_upper_func_3_6() |
| call void @refine_lower_func_4_8() |
| call void @func_9_10_a() |
| ret void |
| } |
| |
| ; 5,5 -> 5,5 |
| define internal void @func_5_5() #16 { |
| ; CHECK-LABEL: define internal void @func_5_5 |
| ; CHECK-SAME: () #[[ATTR17:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| ; 5,8 -> 8,8 |
| define internal void @func_5_8() #17 { |
| ; CHECK-LABEL: define internal void @func_5_8 |
| ; CHECK-SAME: () #[[ATTR18:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| ; 9,10 -> 9,10 |
| define internal void @func_9_10_a() #18 { |
| ; CHECK-LABEL: define internal void @func_9_10_a |
| ; CHECK-SAME: () #[[ATTR19:[0-9]+]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| ; 9,10 -> 9,9 |
| define internal void @func_9_10_b() #18 { |
| ; CHECK-LABEL: define internal void @func_9_10_b |
| ; CHECK-SAME: () #[[ATTR19]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel_bar_8_9() #19 { |
| ; CHECK-LABEL: define amdgpu_kernel void @kernel_bar_8_9 |
| ; CHECK-SAME: () #[[ATTR20:[0-9]+]] { |
| ; CHECK-NEXT: call void @refine_upper_func_3_6() |
| ; CHECK-NEXT: call void @func_5_5() |
| ; CHECK-NEXT: call void @func_9_10_b() |
| ; CHECK-NEXT: call void @func_5_8() |
| ; CHECK-NEXT: call void @externally_visible() |
| ; CHECK-NEXT: ret void |
| ; |
| call void @refine_upper_func_3_6() |
| call void @func_5_5() |
| call void @func_9_10_b() |
| call void @func_5_8() |
| call void @externally_visible() |
| ret void |
| } |
| |
| ; This is an optimization hint based on users, so it's not strictly |
| ; required that all callers be visible. |
| define void @externally_visible() { |
| ; CHECK-LABEL: define void @externally_visible |
| ; CHECK-SAME: () #[[ATTR9]] { |
| ; CHECK-NEXT: ret void |
| ; |
| ret void |
| } |
| |
| |
| ; Use a 1 wave workgroup so there is no interaction by the workgroup |
| ; size on the implied waves per EU. |
| |
| attributes #0 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,8" } |
| attributes #1 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" } |
| attributes #2 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,4" } |
| attributes #3 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,1" } |
| attributes #4 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,8" } |
| attributes #5 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,10" } |
| attributes #6 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,9" } |
| attributes #7 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,9" } |
| attributes #8 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="3,8" } |
| attributes #9 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="0,8" } |
| attributes #10 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,123" } |
| attributes #11 = { "amdgpu-flat-work-group-size"="1,512" } |
| attributes #12 = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-waves-per-eu"="3,6" } |
| attributes #13 = { "amdgpu-waves-per-eu"="3,6" } |
| attributes #14 = { "amdgpu-waves-per-eu"="4,8" } |
| attributes #15 = { "amdgpu-waves-per-eu"="6,8" } |
| attributes #16 = { "amdgpu-waves-per-eu"="5,5" } |
| attributes #17 = { "amdgpu-waves-per-eu"="5,8" } |
| attributes #18 = { "amdgpu-waves-per-eu"="9,10" } |
| attributes #19 = { "amdgpu-waves-per-eu"="8,9" } |
| ;. |
| ; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR14]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" } |
| ;. |