| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals |
| ; RUN: opt -S -mtriple=amdgcn-amd- -passes=amdgpu-attributor %s | FileCheck %s |
| |
| ; Test to ensure recursive functions exhibit proper behaviour |
| ; Test to generate fibonacci numbers |
| |
| define i32 @fib(i32 %n) #0 { |
| ; CHECK-LABEL: define {{[^@]+}}@fib |
| ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 |
| ; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] |
| ; CHECK: cont1: |
| ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 |
| ; CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] |
| ; CHECK: cont2: |
| ; CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 |
| ; CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]]) |
| ; CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 |
| ; CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]]) |
| ; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] |
| ; CHECK-NEXT: ret i32 [[RETVAL]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret i32 1 |
| ; |
| %cmp1 = icmp eq i32 %n, 0 |
| br i1 %cmp1, label %exit, label %cont1 |
| |
| cont1: |
| %cmp2 = icmp eq i32 %n, 1 |
| br i1 %cmp2, label %exit, label %cont2 |
| |
| cont2: |
| %nm1 = sub i32 %n, 1 |
| %fibm1 = call i32 @fib(i32 %nm1) |
| %nm2 = sub i32 %n, 2 |
| %fibm2 = call i32 @fib(i32 %nm2) |
| %retval = add i32 %fibm1, %fibm2 |
| |
| ret i32 %retval |
| |
| exit: |
| ret i32 1 |
| } |
| |
| define internal i32 @fib_internal(i32 %n) #0 { |
| ; CHECK-LABEL: define {{[^@]+}}@fib_internal |
| ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 |
| ; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] |
| ; CHECK: cont1: |
| ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 |
| ; CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] |
| ; CHECK: cont2: |
| ; CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 |
| ; CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]]) |
| ; CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 |
| ; CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]]) |
| ; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] |
| ; CHECK-NEXT: ret i32 [[RETVAL]] |
| ; CHECK: exit: |
| ; CHECK-NEXT: ret i32 1 |
| ; |
| %cmp1 = icmp eq i32 %n, 0 |
| br i1 %cmp1, label %exit, label %cont1 |
| |
| cont1: |
| %cmp2 = icmp eq i32 %n, 1 |
| br i1 %cmp2, label %exit, label %cont2 |
| |
| cont2: |
| %nm1 = sub i32 %n, 1 |
| %fibm1 = call i32 @fib_internal(i32 %nm1) |
| %nm2 = sub i32 %n, 2 |
| %fibm2 = call i32 @fib_internal(i32 %nm2) |
| %retval = add i32 %fibm1, %fibm2 |
| |
| ret i32 %retval |
| |
| exit: |
| ret i32 1 |
| } |
| |
| define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 { |
| ; CHECK-LABEL: define {{[^@]+}}@kernel |
| ; CHECK-SAME: (ptr addrspace(1) [[M:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5) |
| ; CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5) |
| ; CHECK-NEXT: store i32 [[R]], ptr addrspace(1) [[M]], align 4 |
| ; CHECK-NEXT: store i32 [[R2]], ptr addrspace(1) [[M]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %r = call i32 @fib(i32 5) |
| %r2 = call i32 @fib_internal(i32 5) |
| |
| store i32 %r, ptr addrspace(1) %m |
| store i32 %r2, ptr addrspace(1) %m |
| ret void |
| } |
| |
| ; nounwind and readnone are added to match attributor results. |
| attributes #0 = { nounwind readnone } |
| attributes #1 = { "uniform-work-group-size"="true" } |
| ;. |
| ; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } |
| ; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } |
| ; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="true" } |
| ;. |