|  | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s | 
|  | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s | 
|  | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s | 
|  |  | 
|  | ; CHECK:              --- | 
|  | ; CHECK:      amdhsa.kernels: | 
|  |  | 
|  | ; CHECK:        - .args: | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           r | 
|  | ; CHECK-NEXT:         .offset:         0 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           a | 
|  | ; CHECK-NEXT:         .offset:         8 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           b | 
|  | ; CHECK-NEXT:         .offset:         16 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK:          .name:           test0 | 
|  | ; CHECK:          .symbol:         test0.kd | 
|  | define amdgpu_kernel void @test0( | 
|  | ptr addrspace(1) %r, | 
|  | ptr addrspace(1) %a, | 
|  | ptr addrspace(1) %b) { | 
|  | entry: | 
|  | %a.val = load half, ptr addrspace(1) %a | 
|  | %b.val = load half, ptr addrspace(1) %b | 
|  | %r.val = fadd half %a.val, %b.val | 
|  | store half %r.val, ptr addrspace(1) %r | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; CHECK:        - .args: | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           r | 
|  | ; CHECK-NEXT:         .offset:         0 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           a | 
|  | ; CHECK-NEXT:         .offset:         8 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           b | 
|  | ; CHECK-NEXT:         .offset:         16 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .offset:         24 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_x | 
|  | ; CHECK:          .name:           test8 | 
|  | ; CHECK:          .symbol:         test8.kd | 
|  | define amdgpu_kernel void @test8( | 
|  | ptr addrspace(1) %r, | 
|  | ptr addrspace(1) %a, | 
|  | ptr addrspace(1) %b) #0 { | 
|  | entry: | 
|  | %a.val = load half, ptr addrspace(1) %a | 
|  | %b.val = load half, ptr addrspace(1) %b | 
|  | %r.val = fadd half %a.val, %b.val | 
|  | store half %r.val, ptr addrspace(1) %r | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; CHECK:        - .args: | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           r | 
|  | ; CHECK-NEXT:         .offset:         0 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           a | 
|  | ; CHECK-NEXT:         .offset:         8 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           b | 
|  | ; CHECK-NEXT:         .offset:         16 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .offset:         24 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_x | 
|  | ; CHECK-NEXT:       - .offset:         32 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_y | 
|  | ; CHECK:          .name:           test16 | 
|  | ; CHECK:          .symbol:         test16.kd | 
|  | define amdgpu_kernel void @test16( | 
|  | ptr addrspace(1) %r, | 
|  | ptr addrspace(1) %a, | 
|  | ptr addrspace(1) %b) #1 { | 
|  | entry: | 
|  | %a.val = load half, ptr addrspace(1) %a | 
|  | %b.val = load half, ptr addrspace(1) %b | 
|  | %r.val = fadd half %a.val, %b.val | 
|  | store half %r.val, ptr addrspace(1) %r | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; CHECK:        - .args: | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           r | 
|  | ; CHECK-NEXT:         .offset:         0 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           a | 
|  | ; CHECK-NEXT:         .offset:         8 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           b | 
|  | ; CHECK-NEXT:         .offset:         16 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .offset:         24 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_x | 
|  | ; CHECK-NEXT:       - .offset:         32 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_y | 
|  | ; CHECK-NEXT:       - .offset:         40 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_z | 
|  | ; CHECK:          .name:           test24 | 
|  | ; CHECK:          .symbol:         test24.kd | 
|  | define amdgpu_kernel void @test24( | 
|  | ptr addrspace(1) %r, | 
|  | ptr addrspace(1) %a, | 
|  | ptr addrspace(1) %b) #2 { | 
|  | entry: | 
|  | %a.val = load half, ptr addrspace(1) %a | 
|  | %b.val = load half, ptr addrspace(1) %b | 
|  | %r.val = fadd half %a.val, %b.val | 
|  | store half %r.val, ptr addrspace(1) %r | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; CHECK:        - .args: | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           r | 
|  | ; CHECK-NEXT:         .offset:         0 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           a | 
|  | ; CHECK-NEXT:         .offset:         8 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           b | 
|  | ; CHECK-NEXT:         .offset:         16 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .offset:         24 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_x | 
|  | ; CHECK-NEXT:       - .offset:         32 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_y | 
|  | ; CHECK-NEXT:       - .offset:         40 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_z | 
|  | ; CHECK-NEXT:       - .offset:         48 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_hostcall_buffer | 
|  | ; CHECK:          .name:           test32 | 
|  | ; CHECK:          .symbol:         test32.kd | 
|  | define amdgpu_kernel void @test32( | 
|  | ptr addrspace(1) %r, | 
|  | ptr addrspace(1) %a, | 
|  | ptr addrspace(1) %b) #3 { | 
|  | entry: | 
|  | %a.val = load half, ptr addrspace(1) %a | 
|  | %b.val = load half, ptr addrspace(1) %b | 
|  | %r.val = fadd half %a.val, %b.val | 
|  | store half %r.val, ptr addrspace(1) %r | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; CHECK:        - .args: | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           r | 
|  | ; CHECK-NEXT:         .offset:         0 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           a | 
|  | ; CHECK-NEXT:         .offset:         8 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           b | 
|  | ; CHECK-NEXT:         .offset:         16 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .offset:         24 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_x | 
|  | ; CHECK-NEXT:       - .offset:         32 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_y | 
|  | ; CHECK-NEXT:       - .offset:         40 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_z | 
|  | ; CHECK-NEXT:       - .offset:         48 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_hostcall_buffer | 
|  | ; CHECK-NEXT:       - .offset:         56 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_default_queue | 
|  | ; CHECK-NEXT:       - .offset:         64 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_completion_action | 
|  | ; CHECK:          .name:           test48 | 
|  | ; CHECK:          .symbol:         test48.kd | 
|  | define amdgpu_kernel void @test48( | 
|  | ptr addrspace(1) %r, | 
|  | ptr addrspace(1) %a, | 
|  | ptr addrspace(1) %b) #4 { | 
|  | entry: | 
|  | %a.val = load half, ptr addrspace(1) %a | 
|  | %b.val = load half, ptr addrspace(1) %b | 
|  | %r.val = fadd half %a.val, %b.val | 
|  | store half %r.val, ptr addrspace(1) %r | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; CHECK:        - .args: | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           r | 
|  | ; CHECK-NEXT:         .offset:         0 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           a | 
|  | ; CHECK-NEXT:         .offset:         8 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .address_space:  global | 
|  | ; CHECK-NEXT:         .name:           b | 
|  | ; CHECK-NEXT:         .offset:         16 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     global_buffer | 
|  | ; CHECK-NEXT:       - .offset:         24 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_x | 
|  | ; CHECK-NEXT:       - .offset:         32 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_y | 
|  | ; CHECK-NEXT:       - .offset:         40 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_global_offset_z | 
|  | ; CHECK-NEXT:       - .offset:         48 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_hostcall_buffer | 
|  | ; CHECK-NEXT:       - .offset:         56 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_default_queue | 
|  | ; CHECK-NEXT:       - .offset:         64 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_completion_action | 
|  | ; CHECK-NEXT:       - .offset:         72 | 
|  | ; CHECK-NEXT:         .size:           8 | 
|  | ; CHECK-NEXT:         .value_kind:     hidden_multigrid_sync_arg | 
|  | ; CHECK:          .name:           test56 | 
|  | ; CHECK:          .symbol:         test56.kd | 
|  | define amdgpu_kernel void @test56( | 
|  | ptr addrspace(1) %r, | 
|  | ptr addrspace(1) %a, | 
|  | ptr addrspace(1) %b) #5 { | 
|  | entry: | 
|  | %a.val = load half, ptr addrspace(1) %a | 
|  | %b.val = load half, ptr addrspace(1) %b | 
|  | %r.val = fadd half %a.val, %b.val | 
|  | store half %r.val, ptr addrspace(1) %r | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; CHECK:  amdhsa.version: | 
|  | ; CHECK-NEXT: - 1 | 
|  | ; CHECK-NEXT: - 1 | 
|  |  | 
|  | ; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to | 
|  | ; avoid optimizing out the implicit argument allocation. | 
|  | attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="8" } | 
|  | attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="16" } | 
|  | attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="24" } | 
|  | attributes #3 = { optnone noinline "amdgpu-implicitarg-num-bytes"="32" } | 
|  | attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" } | 
|  | attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } | 
|  |  | 
|  | !llvm.module.flags = !{!0} | 
|  | !0 = !{i32 1, !"amdhsa_code_object_version", i32 400} |