blob: 3820b13f7da1f7162f93d4d6ed682d33cb8ac417 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_32 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_32 | %ptxas-verify %}
declare float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
; CHECK-LABEL: atomic_add_f32_generic
define float @atomic_add_f32_generic(ptr %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_generic(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_generic_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_generic_param_1];
; CHECK-NEXT: atom.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = call float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
ret float %ret
}
declare float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val)
; CHECK-LABEL: atomic_add_f32_addrspace1
define float @atomic_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_addrspace1(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_addrspace1_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_addrspace1_param_1];
; CHECK-NEXT: atom.global.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = call float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %val)
ret float %ret
}
declare float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val)
; CHECK-LABEL: atomic_add_f32_addrspace3
define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_addrspace3(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [atomic_add_f32_addrspace3_param_0];
; CHECK-NEXT: ld.param.b32 %r1, [atomic_add_f32_addrspace3_param_1];
; CHECK-NEXT: atom.shared.add.f32 %r2, [%rd1], %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%ret = call float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %val)
ret float %ret
}