| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-atomic-optimizer<strategy=iterative>,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s |
| ; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-atomic-optimizer<strategy=dpp>,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s |
| declare i32 @llvm.amdgcn.workitem.id.x() |
| define amdgpu_kernel void @global_atomic_fadd_uni_value(ptr addrspace(1) %ptr) #0 { |
| ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_value( |
| ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) |
| ; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 |
| ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) |
| ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) |
| ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]) |
| ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float |
| ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]] |
| ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0 |
| ; IR-ITERATIVE-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]] |
| ; IR-ITERATIVE: 12: |
| ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4 |
| ; IR-ITERATIVE-NEXT: br label [[TMP14]] |
| ; IR-ITERATIVE: 14: |
| ; IR-ITERATIVE-NEXT: ret void |
| ; |
| ; IR-DPP-LABEL: @global_atomic_fadd_uni_value( |
| ; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) |
| ; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 |
| ; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 |
| ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 |
| ; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) |
| ; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) |
| ; IR-DPP-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]) |
| ; IR-DPP-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 |
| ; IR-DPP-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float |
| ; IR-DPP-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]] |
| ; IR-DPP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0 |
| ; IR-DPP-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]] |
| ; IR-DPP: 12: |
| ; IR-DPP-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4 |
| ; IR-DPP-NEXT: br label [[TMP14]] |
| ; IR-DPP: 14: |
| ; IR-DPP-NEXT: ret void |
| ; |
| %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst |
| ret void |
| } |
| |
| |
| define amdgpu_kernel void @global_atomic_fadd_div_value(ptr addrspace(1) %ptr) #0 { |
| ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_value( |
| ; IR-ITERATIVE-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; IR-ITERATIVE-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float |
| ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) |
| ; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 |
| ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) |
| ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) |
| ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) |
| ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] |
| ; IR-ITERATIVE: 8: |
| ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP16:%.*]] seq_cst, align 4 |
| ; IR-ITERATIVE-NEXT: br label [[TMP10:%.*]] |
| ; IR-ITERATIVE: 10: |
| ; IR-ITERATIVE-NEXT: ret void |
| ; IR-ITERATIVE: ComputeLoop: |
| ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP16]], [[COMPUTELOOP]] ] |
| ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP19:%.*]], [[COMPUTELOOP]] ] |
| ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) |
| ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = bitcast float [[DIVVALUE]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP13]], i32 [[TMP12]]) |
| ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP14]] to float |
| ; IR-ITERATIVE-NEXT: [[TMP16]] = fadd float [[ACCUMULATOR]], [[TMP15]] |
| ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = shl i64 1, [[TMP11]] |
| ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], -1 |
| ; IR-ITERATIVE-NEXT: [[TMP19]] = and i64 [[ACTIVEBITS]], [[TMP18]] |
| ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], 0 |
| ; IR-ITERATIVE-NEXT: br i1 [[TMP20]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] |
| ; IR-ITERATIVE: ComputeEnd: |
| ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP6]], 0 |
| ; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[TMP8:%.*]], label [[TMP10]] |
| ; |
| ; IR-DPP-LABEL: @global_atomic_fadd_div_value( |
| ; IR-DPP-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; IR-DPP-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float |
| ; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) |
| ; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 |
| ; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 |
| ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 |
| ; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) |
| ; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) |
| ; IR-DPP-NEXT: [[TMP7:%.*]] = bitcast float [[DIVVALUE]] to i32 |
| ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP7]], i32 -2147483648) |
| ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float |
| ; IR-DPP-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP7]] to float |
| ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP12:%.*]] = fadd float [[TMP9]], [[TMP11]] |
| ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP12]], i32 274, i32 15, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP12]], [[TMP13]] |
| ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 276, i32 15, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]] |
| ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 280, i32 15, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]] |
| ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 322, i32 10, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]] |
| ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 323, i32 12, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]] |
| ; IR-DPP-NEXT: [[TMP23:%.*]] = bitcast float [[TMP22]] to i32 |
| ; IR-DPP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP23]], i32 63) |
| ; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast i32 [[TMP24]] to float |
| ; IR-DPP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) |
| ; IR-DPP-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP6]], 0 |
| ; IR-DPP-NEXT: br i1 [[TMP27]], label [[TMP28:%.*]], label [[TMP30:%.*]] |
| ; IR-DPP: 28: |
| ; IR-DPP-NEXT: [[TMP29:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP26]] seq_cst, align 4 |
| ; IR-DPP-NEXT: br label [[TMP30]] |
| ; IR-DPP: 30: |
| ; IR-DPP-NEXT: ret void |
| ; |
| %id.x = call i32 @llvm.amdgcn.workitem.id.x() |
| %divValue = bitcast i32 %id.x to float |
| %result = atomicrmw fadd ptr addrspace(1) %ptr, float %divValue seq_cst |
| ret void |
| } |
| |
| define amdgpu_kernel void @global_atomic_fsub_uni_value(ptr addrspace(1) %ptr) #0 { |
| ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_value( |
| ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) |
| ; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 |
| ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) |
| ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) |
| ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]) |
| ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float |
| ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]] |
| ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0 |
| ; IR-ITERATIVE-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]] |
| ; IR-ITERATIVE: 12: |
| ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4 |
| ; IR-ITERATIVE-NEXT: br label [[TMP14]] |
| ; IR-ITERATIVE: 14: |
| ; IR-ITERATIVE-NEXT: ret void |
| ; |
| ; IR-DPP-LABEL: @global_atomic_fsub_uni_value( |
| ; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) |
| ; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 |
| ; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 |
| ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 |
| ; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) |
| ; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) |
| ; IR-DPP-NEXT: [[TMP7:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]) |
| ; IR-DPP-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32 |
| ; IR-DPP-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float |
| ; IR-DPP-NEXT: [[TMP10:%.*]] = fmul float 4.000000e+00, [[TMP9]] |
| ; IR-DPP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP6]], 0 |
| ; IR-DPP-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP14:%.*]] |
| ; IR-DPP: 12: |
| ; IR-DPP-NEXT: [[TMP13:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP10]] seq_cst, align 4 |
| ; IR-DPP-NEXT: br label [[TMP14]] |
| ; IR-DPP: 14: |
| ; IR-DPP-NEXT: ret void |
| ; |
| %result = atomicrmw fadd ptr addrspace(1) %ptr, float 4.0 seq_cst |
| ret void |
| } |
| |
| |
| define amdgpu_kernel void @global_atomic_fsub_div_value(ptr addrspace(1) %ptr) #0 { |
| ; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_value( |
| ; IR-ITERATIVE-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; IR-ITERATIVE-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float |
| ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) |
| ; IR-ITERATIVE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 |
| ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) |
| ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) |
| ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) |
| ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]] |
| ; IR-ITERATIVE: 8: |
| ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP16:%.*]] seq_cst, align 4 |
| ; IR-ITERATIVE-NEXT: br label [[TMP10:%.*]] |
| ; IR-ITERATIVE: 10: |
| ; IR-ITERATIVE-NEXT: ret void |
| ; IR-ITERATIVE: ComputeLoop: |
| ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP0:%.*]] ], [ [[TMP16]], [[COMPUTELOOP]] ] |
| ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP7]], [[TMP0]] ], [ [[TMP19:%.*]], [[COMPUTELOOP]] ] |
| ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) |
| ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = bitcast float [[DIVVALUE]] to i32 |
| ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP13]], i32 [[TMP12]]) |
| ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP14]] to float |
| ; IR-ITERATIVE-NEXT: [[TMP16]] = fadd float [[ACCUMULATOR]], [[TMP15]] |
| ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = shl i64 1, [[TMP11]] |
| ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], -1 |
| ; IR-ITERATIVE-NEXT: [[TMP19]] = and i64 [[ACTIVEBITS]], [[TMP18]] |
| ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], 0 |
| ; IR-ITERATIVE-NEXT: br i1 [[TMP20]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]] |
| ; IR-ITERATIVE: ComputeEnd: |
| ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP6]], 0 |
| ; IR-ITERATIVE-NEXT: br i1 [[TMP21]], label [[TMP8:%.*]], label [[TMP10]] |
| ; |
| ; IR-DPP-LABEL: @global_atomic_fsub_div_value( |
| ; IR-DPP-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() |
| ; IR-DPP-NEXT: [[DIVVALUE:%.*]] = bitcast i32 [[ID_X]] to float |
| ; IR-DPP-NEXT: [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) |
| ; IR-DPP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 |
| ; IR-DPP-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 32 |
| ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 |
| ; IR-DPP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP2]], i32 0) |
| ; IR-DPP-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP4]], i32 [[TMP5]]) |
| ; IR-DPP-NEXT: [[TMP7:%.*]] = bitcast float [[DIVVALUE]] to i32 |
| ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP7]], i32 -2147483648) |
| ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float |
| ; IR-DPP-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP7]] to float |
| ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP12:%.*]] = fadd float [[TMP9]], [[TMP11]] |
| ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP12]], i32 274, i32 15, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP12]], [[TMP13]] |
| ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 276, i32 15, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]] |
| ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 280, i32 15, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]] |
| ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 322, i32 10, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]] |
| ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 323, i32 12, i32 15, i1 false) |
| ; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]] |
| ; IR-DPP-NEXT: [[TMP23:%.*]] = bitcast float [[TMP22]] to i32 |
| ; IR-DPP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP23]], i32 63) |
| ; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast i32 [[TMP24]] to float |
| ; IR-DPP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) |
| ; IR-DPP-NEXT: [[TMP27:%.*]] = icmp eq i32 [[TMP6]], 0 |
| ; IR-DPP-NEXT: br i1 [[TMP27]], label [[TMP28:%.*]], label [[TMP30:%.*]] |
| ; IR-DPP: 28: |
| ; IR-DPP-NEXT: [[TMP29:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP26]] seq_cst, align 4 |
| ; IR-DPP-NEXT: br label [[TMP30]] |
| ; IR-DPP: 30: |
| ; IR-DPP-NEXT: ret void |
| ; |
| %id.x = call i32 @llvm.amdgcn.workitem.id.x() |
| %divValue = bitcast i32 %id.x to float |
| %result = atomicrmw fsub ptr addrspace(1) %ptr, float %divValue seq_cst |
| ret void |
| } |
| |
| attributes #0 = {"target-cpu"="gfx906"} |