| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=CHECK %s |
| |
| declare void @function1() |
| |
| declare void @function2() #0 |
| |
| ; Function Attrs: noinline |
| define void @function3(ptr addrspace(4) %argptr, ptr addrspace(1) %sink) #2 { |
| store ptr addrspace(4) %argptr, ptr addrspace(1) %sink, align 8 |
| ret void |
| } |
| |
| ; Function Attrs: noinline |
| define void @function4(i64 %arg, ptr %a) #2 { |
| store i64 %arg, ptr %a |
| ret void |
| } |
| |
| ; Function Attrs: noinline |
| define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 { |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 64 |
| %x = load i64, ptr addrspace(4) %gep |
| store i64 %x, ptr %sink |
| ret void |
| } |
| |
| ; Function Attrs: nounwind readnone speculatable willreturn |
| declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1 |
| |
| ; CHECK: amdhsa.kernels: |
| ; CHECK: - .args: |
| ; CHECK-NOT: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel10 |
| define amdgpu_kernel void @test_kernel10(ptr %a) { |
| store i8 3, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Call to an extern function |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel20 |
| define amdgpu_kernel void @test_kernel20(ptr %a) { |
| call void @function1() |
| store i8 3, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Explicit attribute on kernel |
| |
| ; CHECK: - .args: |
| ; CHECK-NOT: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel21 |
| define amdgpu_kernel void @test_kernel21(ptr %a) #0 { |
| call void @function1() |
| store i8 3, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Explicit attribute on extern callee |
| |
| ; CHECK: - .args: |
| ; CHECK-NOT: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel22 |
| define amdgpu_kernel void @test_kernel22(ptr %a) { |
| call void @function2() |
| store i8 3, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Access more bytes than the pointer size |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel30 |
| define amdgpu_kernel void @test_kernel30(ptr %a) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 88 |
| %x = load i128, ptr addrspace(4) %gep |
| store i128 %x, ptr %a |
| ret void |
| } |
| |
| ; Typical load of heap buffer pointer |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel40 |
| define amdgpu_kernel void @test_kernel40(ptr %a) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96 |
| %x = load i64, ptr addrspace(4) %gep |
| store i64 %x, ptr %a |
| ret void |
| } |
| |
| ; Typical usage, overriden by explicit attribute on kernel |
| |
| ; CHECK: - .args: |
| ; CHECK-NOT: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel41 |
| define amdgpu_kernel void @test_kernel41(ptr %a) #0 { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96 |
| %x = load i64, ptr addrspace(4) %gep |
| store i64 %x, ptr %a |
| ret void |
| } |
| |
| ; Access to implicit arg before the heap pointer |
| |
| ; CHECK: - .args: |
| ; CHECK-NOT: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel42 |
| define amdgpu_kernel void @test_kernel42(ptr %a) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 88 |
| %x = load i64, ptr addrspace(4) %gep |
| store i64 %x, ptr %a |
| ret void |
| } |
| |
| ; Access to implicit arg after the heap pointer |
| |
| ; CHECK: - .args: |
| ; CHECK-NOT: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel43 |
| define amdgpu_kernel void @test_kernel43(ptr %a) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 104 |
| %x = load i64, ptr addrspace(4) %gep |
| store i64 %x, ptr %a |
| ret void |
| } |
| |
| ; Accessing a byte just before the heap pointer |
| |
| ; CHECK: - .args: |
| ; CHECK-NOT: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel44 |
| define amdgpu_kernel void @test_kernel44(ptr %a) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 95 |
| %x = load i8, ptr addrspace(4) %gep, align 1 |
| store i8 %x, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Accessing a byte inside the heap pointer |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel45 |
| define amdgpu_kernel void @test_kernel45(ptr %a) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96 |
| %x = load i8, ptr addrspace(4) %gep, align 1 |
| store i8 %x, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Accessing a byte inside the heap pointer |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel46 |
| define amdgpu_kernel void @test_kernel46(ptr %a) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 103 |
| %x = load i8, ptr addrspace(4) %gep, align 1 |
| store i8 %x, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Accessing a byte just after the heap pointer |
| |
| ; CHECK: - .args: |
| ; CHECK-NOT: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel47 |
| define amdgpu_kernel void @test_kernel47(ptr %a) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 104 |
| %x = load i8, ptr addrspace(4) %gep, align 1 |
| store i8 %x, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Access with an unknown offset |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel50 |
| define amdgpu_kernel void @test_kernel50(ptr %a, i32 %b) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 %b |
| %x = load i8, ptr addrspace(4) %gep, align 1 |
| store i8 %x, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Multiple geps reaching the heap pointer argument. |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel51 |
| define amdgpu_kernel void @test_kernel51(ptr %a) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16 |
| %gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 80 |
| %x = load i8, ptr addrspace(4) %gep2, align 1 |
| store i8 %x, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Multiple geps not reaching the heap pointer argument. |
| |
| ; CHECK: - .args: |
| ; CHECK-NOT: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel52 |
| define amdgpu_kernel void @test_kernel52(ptr %a) { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep1 = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 16 |
| %gep2 = getelementptr inbounds i8, ptr addrspace(4) %gep1, i64 16 |
| %x = load i8, ptr addrspace(4) %gep2, align 1 |
| store i8 %x, ptr %a, align 1 |
| ret void |
| } |
| |
| ; Heap pointer used inside a function call |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel60 |
| define amdgpu_kernel void @test_kernel60(ptr %a) #2 { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 96 |
| %x = load i64, ptr addrspace(4) %gep |
| call void @function4(i64 %x, ptr %a) |
| ret void |
| } |
| |
| ; Heap pointer retrieved inside a function call; chain of geps |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel61 |
| define amdgpu_kernel void @test_kernel61(ptr %a) #2 { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i64 32 |
| call void @function5(ptr addrspace(4) %gep, ptr %a) |
| ret void |
| } |
| |
| ; Pointer captured |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel70 |
| define amdgpu_kernel void @test_kernel70(ptr addrspace(1) %sink) #2 { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42 |
| store ptr addrspace(4) %gep, ptr addrspace(1) %sink, align 8 |
| ret void |
| } |
| |
| ; Pointer captured inside function call |
| |
| ; CHECK: - .args: |
| ; CHECK: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel71 |
| define amdgpu_kernel void @test_kernel71(ptr addrspace(1) %sink) #2 { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42 |
| call void @function3(ptr addrspace(4) %gep, ptr addrspace(1) %sink) |
| ret void |
| } |
| |
| ; Ineffective pointer capture |
| |
| ; CHECK: - .args: |
| ; CHECK-NOT: hidden_heap_v1 |
| ; CHECK-LABEL: .name: test_kernel72 |
| define amdgpu_kernel void @test_kernel72() #2 { |
| %ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() |
| %gep = getelementptr inbounds i8, ptr addrspace(4) %ptr, i32 42 |
| store ptr addrspace(4) %gep, ptr addrspace(1) undef, align 8 |
| ret void |
| } |
| |
| attributes #0 = { "amdgpu-no-heap-ptr" } |
| attributes #1 = { nounwind readnone speculatable willreturn } |
| attributes #2 = { noinline } |
| |
| !llvm.module.flags = !{!0} |
| !0 = !{i32 1, !"amdgpu_code_object_version", i32 500} |