blob: 86e3d9338e07803e7dd0d204f4e6f9ba848bb78a [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-atomic-optimizer<strategy=iterative>,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s
; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-atomic-optimizer<strategy=dpp>,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s
declare i32 @llvm.amdgcn.workitem.id.x()
define amdgpu_kernel void @global_atomic_fadd_uni_value(ptr addrspace(1) %ptr) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_value(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
; IR-ITERATIVE-NEXT: br label [[TMP14]]
; IR-ITERATIVE: 14:
; IR-ITERATIVE-NEXT: ret void
;
; IR-DPP-LABEL: @global_atomic_fadd_uni_value(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-DPP-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
; IR-DPP-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; IR-DPP-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
; IR-DPP-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
; IR-DPP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-DPP-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
; IR-DPP: 12:
; IR-DPP-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
; IR-DPP-NEXT: br label [[TMP14]]
; IR-DPP: 14:
; IR-DPP-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst
ret void
}
define amdgpu_kernel void @global_atomic_fadd_div_value(ptr addrspace(1) %ptr) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_value(
; IR-ITERATIVE-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; IR-ITERATIVE-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 8:
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP16:%.*]] seq_cst, align 4
; IR-ITERATIVE-NEXT: br label [[TMP10:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: ret void
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP16]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP19:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = bitcast float [[DIVVALUE]] to i32
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP13]], i32 [[TMP12]])
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP14]] to float
; IR-ITERATIVE-NEXT: [[TMP16]] = fadd float [[ACCUMULATOR]], [[TMP15]]
; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = shl i64 1, [[TMP11]]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], -1
; IR-ITERATIVE-NEXT: [[TMP19]] = and i64 [[ACTIVEBITS]], [[TMP18]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP20]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[TMP8:%.*]], label [[TMP10]]
;
; IR-DPP-LABEL: @global_atomic_fadd_div_value(
; IR-DPP-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; IR-DPP-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-DPP-NEXT: [[TMP7:%.*]] = bitcast float [[DIVVALUE]] to i32
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP7]], i32 -2147483648)
; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float
; IR-DPP-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP7]] to float
; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP12:%.*]] = fadd float [[TMP9]], [[TMP11]]
; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP12]], i32 274, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP12]], [[TMP13]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 276, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 280, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 322, i32 10, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 323, i32 12, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]]
; IR-DPP-NEXT: [[TMP23:%.*]] = bitcast float [[TMP22]] to i32
; IR-DPP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP23]], i32 63)
; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast i32 [[TMP24]] to float
; IR-DPP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]])
; IR-DPP-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-DPP-NEXT: br i1 [[TMP27]], label [[TMP28:%.*]], label [[TMP30:%.*]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP26]] seq_cst, align 4
; IR-DPP-NEXT: br label [[TMP30]]
; IR-DPP: 30:
; IR-DPP-NEXT: ret void
;
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
%divValue = bitcast i32 %id.x to float
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue seq_cst
ret void
}
define amdgpu_kernel void @global_atomic_fsub_uni_value(ptr addrspace(1) %ptr) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_value(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
; IR-ITERATIVE-NEXT: br label [[TMP14]]
; IR-ITERATIVE: 14:
; IR-ITERATIVE-NEXT: ret void
;
; IR-DPP-LABEL: @global_atomic_fsub_uni_value(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-DPP-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]])
; IR-DPP-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
; IR-DPP-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float
; IR-DPP-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]]
; IR-DPP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-DPP-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]]
; IR-DPP: 12:
; IR-DPP-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4
; IR-DPP-NEXT: br label [[TMP14]]
; IR-DPP: 14:
; IR-DPP-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst
ret void
}
define amdgpu_kernel void @global_atomic_fsub_div_value(ptr addrspace(1) %ptr) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_value(
; IR-ITERATIVE-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; IR-ITERATIVE-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 8:
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP16:%.*]] seq_cst, align 4
; IR-ITERATIVE-NEXT: br label [[TMP10:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: ret void
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP16]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP19:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = bitcast float [[DIVVALUE]] to i32
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP13]], i32 [[TMP12]])
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP14]] to float
; IR-ITERATIVE-NEXT: [[TMP16]] = fadd float [[ACCUMULATOR]], [[TMP15]]
; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = shl i64 1, [[TMP11]]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], -1
; IR-ITERATIVE-NEXT: [[TMP19]] = and i64 [[ACTIVEBITS]], [[TMP18]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP20]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[TMP8:%.*]], label [[TMP10]]
;
; IR-DPP-LABEL: @global_atomic_fsub_div_value(
; IR-DPP-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; IR-DPP-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float
; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0)
; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]])
; IR-DPP-NEXT: [[TMP7:%.*]] = bitcast float [[DIVVALUE]] to i32
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP7]], i32 -2147483648)
; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float
; IR-DPP-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP7]] to float
; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP12:%.*]] = fadd float [[TMP9]], [[TMP11]]
; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP12]], i32 274, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP12]], [[TMP13]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 276, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 280, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 322, i32 10, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 323, i32 12, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]]
; IR-DPP-NEXT: [[TMP23:%.*]] = bitcast float [[TMP22]] to i32
; IR-DPP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP23]], i32 63)
; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast i32 [[TMP24]] to float
; IR-DPP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]])
; IR-DPP-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP6]], 0
; IR-DPP-NEXT: br i1 [[TMP27]], label [[TMP28:%.*]], label [[TMP30:%.*]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP26]] seq_cst, align 4
; IR-DPP-NEXT: br label [[TMP30]]
; IR-DPP: 30:
; IR-DPP-NEXT: ret void
;
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
%divValue = bitcast i32 %id.x to float
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue seq_cst
ret void
}
attributes #0 = {"target-cpu"="gfx906"}