blob: dbc8c3175cbc2377942fc7fd9d4f5e0b7a2b8d47 [file] [log] [blame]
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -emit-llvm -o - -fcuda-is-device %s | FileCheck --check-prefix=DEV %s
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fatomic-fine-grained-memory -fatomic-ignore-denormal-mode \
// RUN: -emit-llvm -o - -fcuda-is-device %s | FileCheck --check-prefix=OPT %s
#include "Inputs/cuda.h"
// HOST-LABEL: define dso_local void @_Z12test_defaultPf(
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// HOST-NEXT: [[ENTRY:.*:]]
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: ret void
//
// DEV-LABEL: define dso_local void @_Z12test_defaultPf(
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// DEV-NEXT: [[ENTRY:.*:]]
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]], !amdgpu.no.remote.memory [[META4]]
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: ret void
//
// OPT-LABEL: define dso_local void @_Z12test_defaultPf(
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// OPT-NEXT: [[ENTRY:.*:]]
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4:![0-9]+]], !amdgpu.ignore.denormal.mode [[META4]]
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: ret void
//
__device__ __host__ void test_default(float *a) {
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
}
// HOST-LABEL: define dso_local void @_Z8test_onePf(
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// HOST-NEXT: [[ENTRY:.*:]]
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: ret void
//
// DEV-LABEL: define dso_local void @_Z8test_onePf(
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// DEV-NEXT: [[ENTRY:.*:]]
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]]
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: ret void
//
// OPT-LABEL: define dso_local void @_Z8test_onePf(
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// OPT-NEXT: [[ENTRY:.*:]]
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]]
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: ret void
//
__device__ __host__ void test_one(float *a) {
[[clang::atomic(no_remote_memory)]] {
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
}
}
// HOST-LABEL: define dso_local void @_Z8test_twoPf(
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// HOST-NEXT: [[ENTRY:.*:]]
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: ret void
//
// DEV-LABEL: define dso_local void @_Z8test_twoPf(
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// DEV-NEXT: [[ENTRY:.*:]]
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]]
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: ret void
//
// OPT-LABEL: define dso_local void @_Z8test_twoPf(
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// OPT-NEXT: [[ENTRY:.*:]]
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META4]]
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: ret void
//
__device__ __host__ void test_two(float *a) {
[[clang::atomic(remote_memory, ignore_denormal_mode)]] {
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
}
}
// HOST-LABEL: define dso_local void @_Z10test_threePf(
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// HOST-NEXT: [[ENTRY:.*:]]
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: ret void
//
// DEV-LABEL: define dso_local void @_Z10test_threePf(
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// DEV-NEXT: [[ENTRY:.*:]]
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]]
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: ret void
//
// OPT-LABEL: define dso_local void @_Z10test_threePf(
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// OPT-NEXT: [[ENTRY:.*:]]
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]]
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: ret void
//
__device__ __host__ void test_three(float *a) {
[[clang::atomic(no_remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] {
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
}
}
// HOST-LABEL: define dso_local void @_Z19test_multiple_attrsPf(
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// HOST-NEXT: [[ENTRY:.*:]]
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: ret void
//
// DEV-LABEL: define dso_local void @_Z19test_multiple_attrsPf(
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// DEV-NEXT: [[ENTRY:.*:]]
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: ret void
//
// OPT-LABEL: define dso_local void @_Z19test_multiple_attrsPf(
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// OPT-NEXT: [[ENTRY:.*:]]
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.ignore.denormal.mode [[META4]]
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: ret void
//
__device__ __host__ void test_multiple_attrs(float *a) {
[[clang::atomic(no_remote_memory)]] [[clang::atomic(remote_memory)]] {
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
}
}
// HOST-LABEL: define dso_local void @_Z11test_nestedPf(
// HOST-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// HOST-NEXT: [[ENTRY:.*:]]
// HOST-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
// HOST-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4
// HOST-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4
// HOST-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4
// HOST-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4
// HOST-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4
// HOST-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4
// HOST-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4
// HOST-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4
// HOST-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// HOST-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP]], align 4
// HOST-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4
// HOST-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP]], align 4
// HOST-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// HOST-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1]], align 4
// HOST-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1]], align 4
// HOST-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] seq_cst, align 4
// HOST-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2]], align 4
// HOST-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2]], align 4
// HOST-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// HOST-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3]], align 4
// HOST-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3]], align 4
// HOST-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] acquire, align 4
// HOST-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4]], align 4
// HOST-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4]], align 4
// HOST-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8
// HOST-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5]], align 4
// HOST-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5]], align 4
// HOST-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] release, align 4
// HOST-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6]], align 4
// HOST-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6]], align 4
// HOST-NEXT: ret void
//
// DEV-LABEL: define dso_local void @_Z11test_nestedPf(
// DEV-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// DEV-NEXT: [[ENTRY:.*:]]
// DEV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// DEV-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4, addrspace(5)
// DEV-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// DEV-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// DEV-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// DEV-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
// DEV-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
// DEV-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr
// DEV-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr
// DEV-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr
// DEV-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr
// DEV-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// DEV-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]]
// DEV-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// DEV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1_ASCAST]], align 4
// DEV-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1_ASCAST]], align 4
// DEV-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] syncscope("agent") seq_cst, align 4
// DEV-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
// DEV-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
// DEV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3_ASCAST]], align 4
// DEV-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3_ASCAST]], align 4
// DEV-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] syncscope("workgroup") acquire, align 4, !amdgpu.no.remote.memory [[META4]]
// DEV-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4
// DEV-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4_ASCAST]], align 4
// DEV-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// DEV-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5_ASCAST]], align 4
// DEV-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5_ASCAST]], align 4
// DEV-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] syncscope("wavefront") release, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// DEV-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4
// DEV-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6_ASCAST]], align 4
// DEV-NEXT: ret void
//
// OPT-LABEL: define dso_local void @_Z11test_nestedPf(
// OPT-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// OPT-NEXT: [[ENTRY:.*:]]
// OPT-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// OPT-NEXT: [[DOTATOMICTMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[DOTATOMICTMP1:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[DOTATOMICTMP3:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[DOTATOMICTMP5:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca float, align 4, addrspace(5)
// OPT-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// OPT-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr
// OPT-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr
// OPT-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr
// OPT-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr
// OPT-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr
// OPT-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr
// OPT-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr
// OPT-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr
// OPT-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: store float 1.000000e+00, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP1:%.*]] = load float, ptr [[DOTATOMICTMP_ASCAST]], align 4
// OPT-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP1]] monotonic, align 4, !amdgpu.no.remote.memory [[META4]], !amdgpu.ignore.denormal.mode [[META4]]
// OPT-NEXT: store float [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[ATOMIC_TEMP_ASCAST]], align 4
// OPT-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: store float 2.000000e+00, ptr [[DOTATOMICTMP1_ASCAST]], align 4
// OPT-NEXT: [[TMP5:%.*]] = load float, ptr [[DOTATOMICTMP1_ASCAST]], align 4
// OPT-NEXT: [[TMP6:%.*]] = atomicrmw fmax ptr [[TMP4]], float [[TMP5]] syncscope("agent") seq_cst, align 4
// OPT-NEXT: store float [[TMP6]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4
// OPT-NEXT: [[TMP7:%.*]] = load float, ptr [[ATOMIC_TEMP2_ASCAST]], align 4
// OPT-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: store float 3.000000e+00, ptr [[DOTATOMICTMP3_ASCAST]], align 4
// OPT-NEXT: [[TMP9:%.*]] = load float, ptr [[DOTATOMICTMP3_ASCAST]], align 4
// OPT-NEXT: [[TMP10:%.*]] = atomicrmw fmin ptr [[TMP8]], float [[TMP9]] syncscope("workgroup") acquire, align 4, !amdgpu.no.remote.memory [[META4]]
// OPT-NEXT: store float [[TMP10]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4
// OPT-NEXT: [[TMP11:%.*]] = load float, ptr [[ATOMIC_TEMP4_ASCAST]], align 4
// OPT-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8
// OPT-NEXT: store float 4.000000e+00, ptr [[DOTATOMICTMP5_ASCAST]], align 4
// OPT-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTATOMICTMP5_ASCAST]], align 4
// OPT-NEXT: [[TMP14:%.*]] = atomicrmw fsub ptr [[TMP12]], float [[TMP13]] syncscope("wavefront") release, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// OPT-NEXT: store float [[TMP14]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4
// OPT-NEXT: [[TMP15:%.*]] = load float, ptr [[ATOMIC_TEMP6_ASCAST]], align 4
// OPT-NEXT: ret void
//
__device__ __host__ void test_nested(float *a) {
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
{
[[clang::atomic(remote_memory, fine_grained_memory, no_ignore_denormal_mode)]] {
__scoped_atomic_fetch_max(a, 2, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
{
[[clang::atomic(no_remote_memory)]] {
__scoped_atomic_fetch_min(a, 3, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP);
}
}
{
[[clang::atomic(no_fine_grained_memory)]] {
__scoped_atomic_fetch_sub(a, 4, __ATOMIC_RELEASE, __MEMORY_SCOPE_WVFRNT);
}
}
}
}
}
//
//
//
//
template<typename T> __device__ __host__ void test_template(T *a) {
[[clang::atomic(no_remote_memory, fine_grained_memory)]] {
__scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
}
}
template __device__ __host__ void test_template<float>(float *a);
//.
// DEV: [[META4]] = !{}
//.
// OPT: [[META4]] = !{}
//.