blob: 1411890e01dc59a09f2d91ee31c2df281515ac14 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx900 %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx90a %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx940 %s | FileCheck %s
;---------------------------------------------------------------------
; atomicrmw fadd
;---------------------------------------------------------------------
define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x half> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
ret <2 x half> %res
}
define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x bfloat> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
ret <2 x bfloat> %res
}
define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x half> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x half> [[NEW]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[LOADED]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x half>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x half> [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4
ret <2 x half> %res
}
define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x bfloat> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x bfloat> [[NEW]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x bfloat> [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4
ret <2 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
ret <4 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
ret <4 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[NEW]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LOADED]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x half>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat> [[NEW]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8
ret <4 x bfloat> %res
}
define <2 x float> @test_atomicrmw_fadd_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) {
; CHECK-LABEL: define <2 x float> @test_atomicrmw_fadd_v2f32_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x float> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[NEW]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LOADED]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <2 x float>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x float> [[TMP5]]
;
%res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8
ret <2 x float> %res
}
;---------------------------------------------------------------------
; atomicrmw fsub
;---------------------------------------------------------------------
define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x half> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
ret <2 x half> %res
}
define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x bfloat> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
ret <2 x bfloat> %res
}
define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x half> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x half> [[NEW]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[LOADED]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x half>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x half> [[TMP5]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4
ret <2 x half> %res
}
define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x bfloat> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x bfloat> [[NEW]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x bfloat> [[TMP5]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4
ret <2 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
ret <4 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
ret <4 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[NEW]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LOADED]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x half>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[TMP5]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat> [[NEW]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[TMP5]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8
ret <4 x bfloat> %res
}
define <2 x float> @test_atomicrmw_fsub_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) {
; CHECK-LABEL: define <2 x float> @test_atomicrmw_fsub_v2f32_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x float> [[LOADED]], [[VALUE]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[NEW]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LOADED]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <2 x float>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x float> [[TMP5]]
;
%res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8
ret <2 x float> %res
}
;---------------------------------------------------------------------
; atomicrmw fmin
;---------------------------------------------------------------------
define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
ret <2 x half> %res
}
define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
ret <2 x bfloat> %res
}
define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[TMP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x half> [[LOADED]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x half>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x half> [[TMP6]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4
ret <2 x half> %res
}
define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[TMP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x bfloat> [[TMP6]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4
ret <2 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
ret <4 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
ret <4 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP2]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[LOADED]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x half>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[TMP6]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[TMP2]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[TMP6]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8
ret <4 x bfloat> %res
}
define <2 x float> @test_atomicrmw_fmin_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) {
; CHECK-LABEL: define <2 x float> @test_atomicrmw_fmin_v2f32_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.minnum.v2f32(<2 x float> [[LOADED]], <2 x float> [[VALUE]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP2]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[LOADED]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <2 x float>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x float> [[TMP6]]
;
%res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8
ret <2 x float> %res
}
;---------------------------------------------------------------------
; atomicrmw fmax
;---------------------------------------------------------------------
define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x half> [[NEWLOADED]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2
ret <2 x half> %res
}
define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2
ret <2 x bfloat> %res
}
define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) {
; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[TMP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x half> [[LOADED]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x half>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x half> [[TMP6]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4
ret <2 x half> %res
}
define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) {
; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[TMP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x bfloat> [[TMP6]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4
ret <2 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align2(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2
ret <4 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[NEWLOADED]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align4(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5)
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5)
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]])
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]])
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0
; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1
; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4
ret <4 x bfloat> %res
}
define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) {
; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP2]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[LOADED]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x half>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x half> [[TMP6]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8
ret <4 x half> %res
}
define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) {
; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[TMP2]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <4 x bfloat> [[TMP6]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8
ret <4 x bfloat> %res
}
define <2 x float> @test_atomicrmw_fmax_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) {
; CHECK-LABEL: define <2 x float> @test_atomicrmw_fmax_v2f32_global_agent_align8(
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[LOADED]], <2 x float> [[VALUE]])
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP2]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[LOADED]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <2 x float>
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret <2 x float> [[TMP6]]
;
%res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8
ret <2 x float> %res
}