| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %} |
| |
| declare float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val) |
| |
| ; CHECK-LABEL: atomic_add_f32_generic |
| define float @atomic_add_f32_generic(ptr %addr, float %val) { |
| ; CHECK-LABEL: atomic_add_f32_generic( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_generic_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_generic_param_1]; |
| ; CHECK-NEXT: atom.add.f32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = call float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val) |
| ret float %ret |
| } |
| |
| declare float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val) |
| |
| ; CHECK-LABEL: atomic_add_f32_addrspace1 |
| define float @atomic_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) { |
| ; CHECK-LABEL: atomic_add_f32_addrspace1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_addrspace1_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_addrspace1_param_1]; |
| ; CHECK-NEXT: atom.global.add.f32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = call float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val) |
| ret float %ret |
| } |
| |
| declare float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val) |
| |
| ; CHECK-LABEL: atomic_add_f32_addrspace3 |
| define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) { |
| ; CHECK-LABEL: atomic_add_f32_addrspace3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_addrspace3_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_addrspace3_param_1]; |
| ; CHECK-NEXT: atom.shared.add.f32 %r2, [%rd1], %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %ret = call float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val) |
| ret float %ret |
| } |