| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 |
| // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ |
| // RUN: -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s |
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ |
| // RUN: -emit-llvm -o - -fcuda-is-device %s | FileCheck --check-prefix=DEV %s |
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \ |
| // RUN: -fatomic-fine-grained-memory -fatomic-ignore-denormal-mode \ |
| // RUN: -emit-llvm -o - -fcuda-is-device %s | FileCheck --check-prefix=OPT %s |
| |
| #include "Inputs/cuda.h" |
| |
| // HOST-LABEL: define dso_local void @_Z12test_defaultPf( |
| // HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 |
| // HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: ret void |
| // |
| // DEV-LABEL: define dso_local void @_Z12test_defaultPf( |
| // DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { |
| // DEV-NEXT: [[ENTRY:.*:]] |
| // DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]], !amdgpu.no.remote.memory [[META4]] |
| // DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local void @_Z12test_defaultPf( |
| // OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4:![0-9]+]], !amdgpu.ignore.denormal.mode [[META4]] |
| // OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: ret void |
| // |
| __device__ __host__ void test_default(float *a) { |
| __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| } |
| |
| // HOST-LABEL: define dso_local void @_Z8test_onePf( |
| // HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 |
| // HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: ret void |
| // |
| // DEV-LABEL: define dso_local void @_Z8test_onePf( |
| // DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // DEV-NEXT: [[ENTRY:.*:]] |
| // DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local void @_Z8test_onePf( |
| // OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]] |
| // OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: ret void |
| // |
| __device__ __host__ void test_one(float *a) { |
| [[clang::atomic(no_remote_memory)]] { |
| __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| } |
| } |
| |
| // HOST-LABEL: define dso_local void @_Z8test_twoPf( |
| // HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 |
| // HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: ret void |
| // |
| // DEV-LABEL: define dso_local void @_Z8test_twoPf( |
| // DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // DEV-NEXT: [[ENTRY:.*:]] |
| // DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]] |
| // DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local void @_Z8test_twoPf( |
| // OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META4]] |
| // OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: ret void |
| // |
| __device__ __host__ void test_two(float *a) { |
| [[clang::atomic(remote_memory, ignore_denormal_mode)]] { |
| __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| } |
| } |
| |
| // HOST-LABEL: define dso_local void @_Z10test_threePf( |
| // HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 |
| // HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: ret void |
| // |
| // DEV-LABEL: define dso_local void @_Z10test_threePf( |
| // DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // DEV-NEXT: [[ENTRY:.*:]] |
| // DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]] |
| // DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local void @_Z10test_threePf( |
| // OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]] |
| // OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: ret void |
| // |
| __device__ __host__ void test_three(float *a) { |
| [[clang::atomic(no_remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] { |
| __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| } |
| } |
| |
| // HOST-LABEL: define dso_local void @_Z19test_multiple_attrsPf( |
| // HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 |
| // HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: ret void |
| // |
| // DEV-LABEL: define dso_local void @_Z19test_multiple_attrsPf( |
| // DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // DEV-NEXT: [[ENTRY:.*:]] |
| // DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]] |
| // DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local void @_Z19test_multiple_attrsPf( |
| // OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META4]] |
| // OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: ret void |
| // |
| __device__ __host__ void test_multiple_attrs(float *a) { |
| [[clang::atomic(no_remote_memory)]] [[clang::atomic(remote_memory)]] { |
| __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| } |
| } |
| |
| // HOST-LABEL: define dso_local void @_Z11test_nestedPf( |
| // HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // HOST-NEXT: [[ENTRY:.*:]] |
| // HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4 |
| // HOST-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4 |
| // HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4 |
| // HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4 |
| // HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4 |
| // HOST-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1]], align 4 |
| // HOST-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1]], align 4 |
| // HOST-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] seq_cst, align 4 |
| // HOST-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2]], align 4 |
| // HOST-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2]], align 4 |
| // HOST-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3]], align 4 |
| // HOST-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3]], align 4 |
| // HOST-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] acquire, align 4 |
| // HOST-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4]], align 4 |
| // HOST-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4]], align 4 |
| // HOST-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // HOST-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5]], align 4 |
| // HOST-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5]], align 4 |
| // HOST-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] release, align 4 |
| // HOST-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6]], align 4 |
| // HOST-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6]], align 4 |
| // HOST-NEXT: ret void |
| // |
| // DEV-LABEL: define dso_local void @_Z11test_nestedPf( |
| // DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // DEV-NEXT: [[ENTRY:.*:]] |
| // DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4, addrspace(5) |
| // DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // DEV-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // DEV-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // DEV-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // DEV-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // DEV-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // DEV-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] syncscope("agent") seq_cst, align 4 |
| // DEV-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] syncscope("workgroup") acquire, align 4, !amdgpu.no.remote.memory [[META4]] |
| // DEV-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // DEV-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] syncscope("wavefront") release, align 4, !amdgpu.no.fine.grained.memory [[META4]] |
| // DEV-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // DEV-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // DEV-NEXT: ret void |
| // |
| // OPT-LABEL: define dso_local void @_Z11test_nestedPf( |
| // OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] { |
| // OPT-NEXT: [[ENTRY:.*:]] |
| // OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4, addrspace(5) |
| // OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // OPT-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // OPT-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // OPT-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // OPT-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // OPT-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // OPT-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]] |
| // OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] syncscope("agent") seq_cst, align 4 |
| // OPT-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] syncscope("workgroup") acquire, align 4, !amdgpu.no.remote.memory [[META4]] |
| // OPT-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // OPT-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] syncscope("wavefront") release, align 4, !amdgpu.no.fine.grained.memory [[META4]] |
| // OPT-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // OPT-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // OPT-NEXT: ret void |
| // |
| __device__ __host__ void test_nested(float *a) { |
| __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| { |
| [[clang::atomic(remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] { |
| __scoped_atomic_fetch_max(a, 2, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE); |
| { |
| [[clang::atomic(no_remote_memory)]] { |
| __scoped_atomic_fetch_min(a, 3, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP); |
| } |
| } |
| { |
| [[clang::atomic(no_fine_grained_memory)]] { |
| __scoped_atomic_fetch_sub(a, 4, __ATOMIC_RELEASE, __MEMORY_SCOPE_WVFRNT); |
| } |
| } |
| } |
| } |
| } |
| |
| // |
| // |
| // |
| // |
| template<typename T> __device__ __host__ void test_template(T *a) { |
| [[clang::atomic(no_remote_memory, fine_grained_memory)]] { |
| __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| } |
| } |
| |
| template __device__ __host__ void test_template<float>(float *a); |
| |
| //. |
| // DEV: [[META4]] = !{} |
| //. |
| // OPT: [[META4]] = !{} |
| //. |