| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes='require<libcall-lowering-info>,atomic-expand' %s | FileCheck -check-prefixes=COMMON,GFX803 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -passes='require<libcall-lowering-info>,atomic-expand' %s | FileCheck -check-prefixes=COMMON,GFX906 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -passes='require<libcall-lowering-info>,atomic-expand' %s | FileCheck -check-prefixes=COMMON,GFX908 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes='require<libcall-lowering-info>,atomic-expand' %s | FileCheck -check-prefixes=COMMON,GFX90A %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes='require<libcall-lowering-info>,atomic-expand' %s | FileCheck -check-prefixes=COMMON,GFX942 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -passes='require<libcall-lowering-info>,atomic-expand' %s | FileCheck -check-prefixes=COMMON,GFX10 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes='require<libcall-lowering-info>,atomic-expand' %s | FileCheck -check-prefixes=COMMON,GFX11 %s |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes='require<libcall-lowering-info>,atomic-expand' %s | FileCheck -check-prefixes=COMMON,GFX12 %s |
| |
| ;--------------------------------------------------------------------- |
| ; atomicrmw xchg |
| ;--------------------------------------------------------------------- |
| |
| ; xchg is supported over PCIe, so no expansion is necessary |
| define double @test_atomicrmw_xchg_f64_global_agent(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8 |
| ; COMMON-NEXT: ret double [[RES]] |
| ; |
| %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst |
| ret double %res |
| } |
| |
| ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored. |
| define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]] |
| ; COMMON-NEXT: ret double [[RES]] |
| ; |
| %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 |
| ret double %res |
| } |
| |
| ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored. |
| define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_remote_memory( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; COMMON-NEXT: ret double [[RES]] |
| ; |
| %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| ; xchg is supported over PCIe, so no expansion is necessary. Metadata should be ignored. |
| define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; COMMON-NEXT: ret double [[RES]] |
| ; |
| %res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| ;--------------------------------------------------------------------- |
| ; atomicrmw fadd |
| ;--------------------------------------------------------------------- |
| |
| define double @test_atomicrmw_fadd_f64_global_agent(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8 |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX90A-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz(ptr addrspace(1) %ptr, double %value) #0 { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_daz( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic(ptr addrspace(1) %ptr, double %value) #1 { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory___denormal_fp_mode_f64_dynamic( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2:[0-9]+]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX90A-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz(ptr addrspace(1) %ptr, double %value) #0 { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_daz( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR1]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic(ptr addrspace(1) %ptr, double %value) #1 { |
| ; GFX803-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX803-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX803-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX906-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX906-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX908-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX908-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX10-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX10-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX11-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX11-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP5]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory__denormal_mode_dynamic( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR2]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[NEW:%.*]] = fadd double [[LOADED]], [[VALUE]] |
| ; GFX12-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; GFX12-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fadd ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| ;--------------------------------------------------------------------- |
| ; atomicrmw fsub |
| ;--------------------------------------------------------------------- |
| |
| define double @test_atomicrmw_fsub_f64_global_agent(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; COMMON: atomicrmw.start: |
| ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] |
| ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]] |
| ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double |
| ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; COMMON: atomicrmw.end: |
| ; COMMON-NEXT: ret double [[RES]] |
| ; |
| %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; COMMON: atomicrmw.start: |
| ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] |
| ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]] |
| ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double |
| ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; COMMON: atomicrmw.end: |
| ; COMMON-NEXT: ret double [[RES]] |
| ; |
| %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_remote_memory( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; COMMON: atomicrmw.start: |
| ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] |
| ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]] |
| ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double |
| ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; COMMON: atomicrmw.end: |
| ; COMMON-NEXT: ret double [[RES]] |
| ; |
| %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; COMMON: atomicrmw.start: |
| ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] |
| ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]] |
| ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; COMMON-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double |
| ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; COMMON: atomicrmw.end: |
| ; COMMON-NEXT: ret double [[RES]] |
| ; |
| %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; COMMON: atomicrmw.start: |
| ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]] |
| ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; COMMON: atomicrmw.end: |
| ; COMMON-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; COMMON: atomicrmw.start: |
| ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]] |
| ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; COMMON: atomicrmw.end: |
| ; COMMON-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; COMMON: atomicrmw.start: |
| ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]] |
| ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; COMMON: atomicrmw.end: |
| ; COMMON-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; COMMON-LABEL: define double @test_atomicrmw_fsub_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; COMMON-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; COMMON-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; COMMON: atomicrmw.start: |
| ; COMMON-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] |
| ; COMMON-NEXT: [[NEW:%.*]] = fsub double [[LOADED]], [[VALUE]] |
| ; COMMON-NEXT: [[TMP2:%.*]] = bitcast double [[NEW]] to i64 |
| ; COMMON-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64 |
| ; COMMON-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; COMMON-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 |
| ; COMMON-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 |
| ; COMMON-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double |
| ; COMMON-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; COMMON: atomicrmw.end: |
| ; COMMON-NEXT: ret double [[TMP5]] |
| ; |
| %res = atomicrmw fsub ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| ;--------------------------------------------------------------------- |
| ; atomicrmw fmax |
| ;--------------------------------------------------------------------- |
| |
| define double @test_atomicrmw_fmax_f64_global_agent(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8 |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX10-NEXT: ret double [[RES]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX90A-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX10-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[RES]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: ret double [[RES]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX10-NEXT: ret double [[RES]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX10-NEXT: ret double [[RES]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmax_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.maxnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmax ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| ;--------------------------------------------------------------------- |
| ; atomicrmw fmin |
| ;--------------------------------------------------------------------- |
| |
| define double @test_atomicrmw_fmin_f64_global_agent(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8 |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX10-NEXT: ret double [[RES]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX90A-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[RES:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX10-NEXT: [[RES]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[RES]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: ret double [[RES]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX10-NEXT: ret double [[RES]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX90A: atomicrmw.start: |
| ; GFX90A-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX90A-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX90A-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX90A-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX90A-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX90A: atomicrmw.end: |
| ; GFX90A-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX10-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX10: atomicrmw.start: |
| ; GFX10-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX10-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX10-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX10-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX10-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX10-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX10-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX10-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX10-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX10: atomicrmw.end: |
| ; GFX10-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) { |
| ; GFX803-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX803-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX803-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX803-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX803: atomicrmw.start: |
| ; GFX803-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX803-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX803-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX803-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX803-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX803-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX803-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX803-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX803-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX803: atomicrmw.end: |
| ; GFX803-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX906-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX906-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX906-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX906-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX906: atomicrmw.start: |
| ; GFX906-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX906-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX906-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX906-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX906-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX906-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX906-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX906-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX906-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX906: atomicrmw.end: |
| ; GFX906-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX908-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX908-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX908-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX908: atomicrmw.start: |
| ; GFX908-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX908-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX908-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX908-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX908-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX908: atomicrmw.end: |
| ; GFX908-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX90A-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX90A-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX90A-NEXT: ret double [[RES]] |
| ; |
| ; GFX942-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX942-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX942-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX942-NEXT: ret double [[RES]] |
| ; |
| ; GFX10-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX10-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX10-NEXT: [[RES:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]] |
| ; GFX10-NEXT: ret double [[RES]] |
| ; |
| ; GFX11-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX11-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX11-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX11: atomicrmw.start: |
| ; GFX11-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX11-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX11-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX11-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX11-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX11: atomicrmw.end: |
| ; GFX11-NEXT: ret double [[TMP6]] |
| ; |
| ; GFX12-LABEL: define double @test_atomicrmw_fmin_f64_global_agent__amdgpu_ignore_denormal_mode__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory( |
| ; GFX12-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] { |
| ; GFX12-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(1) [[PTR]], align 8 |
| ; GFX12-NEXT: br label [[ATOMICRMW_START:%.*]] |
| ; GFX12: atomicrmw.start: |
| ; GFX12-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] |
| ; GFX12-NEXT: [[TMP2:%.*]] = call double @llvm.minnum.f64(double [[LOADED]], double [[VALUE]]) |
| ; GFX12-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64 |
| ; GFX12-NEXT: [[TMP4:%.*]] = bitcast double [[LOADED]] to i64 |
| ; GFX12-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]] |
| ; GFX12-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 |
| ; GFX12-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 |
| ; GFX12-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to double |
| ; GFX12-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] |
| ; GFX12: atomicrmw.end: |
| ; GFX12-NEXT: ret double [[TMP6]] |
| ; |
| %res = atomicrmw fmin ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0, !amdgpu.ignore.denormal.mode !0 |
| ret double %res |
| } |
| |
| attributes #0 = { "denormal-fp-mode"="preserve-sign,preserve-sign" } |
| attributes #1 = { "denormal-fp-mode"="dynamic,dynamic" } |
| |
| !0 = !{} |
| ;. |
| ; GFX803: [[META0]] = !{} |
| ;. |
| ; GFX906: [[META0]] = !{} |
| ;. |
| ; GFX908: [[META0]] = !{} |
| ;. |
| ; GFX90A: [[META0]] = !{} |
| ;. |
| ; GFX942: [[META0]] = !{} |
| ;. |
| ; GFX10: [[META0]] = !{} |
| ;. |
| ; GFX11: [[META0]] = !{} |
| ;. |
| ; GFX12: [[META0]] = !{} |
| ;. |