blob: 9cc9fa111745a998e54d490dbf4b2ab8e95fe0f3 [file] [log] [blame] [edit]
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 < %s | FileCheck %s
; CHECK: Occupancy: 16
%struct.zot = type { ptr }
%struct.bar = type { i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr }
%struct.snork = type { i32, i32, float, float, i32, i32, i32 }
%struct.barney = type { ptr }
%struct.zot.0 = type { %struct.ham }
%struct.ham = type { float, float, float, float }
%struct.zot.1 = type { %struct.wobble }
%struct.wobble = type { i32, i32, i32, i32 }
@global = external local_unnamed_addr addrspace(4) constant %struct.zot
@global.1 = external local_unnamed_addr addrspace(4) constant ptr
@global.2 = external local_unnamed_addr addrspace(4) constant ptr
@global.3 = external local_unnamed_addr addrspace(4) constant ptr
@global.4 = external local_unnamed_addr addrspace(4) constant ptr
declare void @llvm.trap() #0
declare void @llvm.lifetime.end.p5(ptr addrspace(5) captures(none)) #1
define amdgpu_kernel void @eggs(ptr addrspace(4) noundef readonly byref(%struct.bar) align 8 captures(none) %arg) local_unnamed_addr #2 {
bb:
%alloca = alloca [128 x float], align 16, addrspace(5)
%load = load i32, ptr addrspace(4) %arg, align 8, !amdgpu.noclobber !0
%getelementptr = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 8
%load1 = load i32, ptr addrspace(4) %getelementptr, align 8, !amdgpu.noclobber !0
%getelementptr2 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 12
%load3 = load i32, ptr addrspace(4) %getelementptr2, align 4, !amdgpu.noclobber !0
%getelementptr4 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 24
%load5 = load i32, ptr addrspace(4) %getelementptr4, align 8, !amdgpu.noclobber !0
%getelementptr6 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 36
%load7 = load i32, ptr addrspace(4) %getelementptr6, align 4, !amdgpu.noclobber !0
%load8 = load i32, ptr addrspace(4) null, align 4294967296
%getelementptr9 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 56
%load10 = load ptr, ptr addrspace(4) %getelementptr9, align 8, !amdgpu.noclobber !0
%addrspacecast = addrspacecast ptr %load10 to ptr addrspace(1)
%getelementptr11 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 72
%load12 = load ptr, ptr addrspace(4) %getelementptr11, align 8, !amdgpu.noclobber !0
%addrspacecast13 = addrspacecast ptr %load12 to ptr addrspace(1)
%getelementptr14 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 80
%load15 = load ptr, ptr addrspace(4) %getelementptr14, align 8, !amdgpu.noclobber !0
%addrspacecast16 = addrspacecast ptr %load15 to ptr addrspace(1)
%getelementptr17 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 88
%load18 = load ptr, ptr addrspace(4) %getelementptr17, align 8, !amdgpu.noclobber !0
%addrspacecast19 = addrspacecast ptr %load18 to ptr addrspace(1)
%getelementptr20 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 96
%load21 = load ptr, ptr addrspace(4) %getelementptr20, align 8, !amdgpu.noclobber !0
%addrspacecast22 = addrspacecast ptr %load21 to ptr addrspace(1)
%getelementptr23 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 112
%load24 = load ptr, ptr addrspace(4) %getelementptr23, align 8, !amdgpu.noclobber !0
%addrspacecast25 = addrspacecast ptr %load24 to ptr addrspace(1)
%getelementptr26 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 120
%load27 = load ptr, ptr addrspace(4) %getelementptr26, align 8, !amdgpu.noclobber !0
%addrspacecast28 = addrspacecast ptr %load27 to ptr addrspace(1)
%getelementptr29 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 136
%load30 = load i32, ptr addrspace(4) %getelementptr29, align 8, !amdgpu.noclobber !0
%getelementptr31 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 144
%load32 = load ptr, ptr addrspace(4) %getelementptr31, align 8, !amdgpu.noclobber !0
%addrspacecast33 = addrspacecast ptr %load32 to ptr addrspace(1)
%getelementptr34 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 152
%load35 = load ptr, ptr addrspace(4) %getelementptr34, align 8, !amdgpu.noclobber !0
%addrspacecast36 = addrspacecast ptr %load35 to ptr addrspace(1)
%getelementptr37 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 176
%load38 = load ptr, ptr addrspace(4) %getelementptr37, align 8, !amdgpu.noclobber !0
%addrspacecast39 = addrspacecast ptr %load38 to ptr addrspace(1)
%getelementptr40 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 184
%load41 = load ptr, ptr addrspace(4) %getelementptr40, align 8, !amdgpu.noclobber !0
%addrspacecast42 = addrspacecast ptr %load41 to ptr addrspace(1)
%getelementptr43 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 192
%load44 = load ptr, ptr addrspace(4) %getelementptr43, align 8, !amdgpu.noclobber !0
%addrspacecast45 = addrspacecast ptr %load44 to ptr addrspace(1)
%getelementptr46 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 200
%load47 = load ptr, ptr addrspace(4) %getelementptr46, align 8, !amdgpu.noclobber !0
%addrspacecast48 = addrspacecast ptr %load47 to ptr addrspace(1)
%getelementptr49 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 208
%load50 = load ptr, ptr addrspace(4) %getelementptr49, align 8, !amdgpu.noclobber !0
%addrspacecast51 = addrspacecast ptr %load50 to ptr addrspace(1)
%getelementptr52 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 216
%load53 = load ptr, ptr addrspace(4) %getelementptr52, align 8, !amdgpu.noclobber !0
%addrspacecast54 = addrspacecast ptr %load53 to ptr addrspace(1)
%getelementptr55 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 224
%load56 = load ptr, ptr addrspace(4) %getelementptr55, align 8, !amdgpu.noclobber !0
%addrspacecast57 = addrspacecast ptr %load56 to ptr addrspace(1)
%getelementptr58 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 232
%load59 = load ptr, ptr addrspace(4) %getelementptr58, align 8, !amdgpu.noclobber !0
%addrspacecast60 = addrspacecast ptr %load59 to ptr addrspace(1)
%getelementptr61 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 280
%load62 = load ptr, ptr addrspace(4) %getelementptr61, align 8, !amdgpu.noclobber !0
%addrspacecast63 = addrspacecast ptr %load62 to ptr addrspace(1)
%getelementptr64 = getelementptr inbounds nuw i8, ptr addrspace(4) %arg, i64 296
%load65 = load ptr, ptr addrspace(4) %getelementptr64, align 8, !amdgpu.noclobber !0
%addrspacecast66 = addrspacecast ptr %load65 to ptr addrspace(1)
%call = tail call noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x()
%and = and i32 %call, 31
%icmp = icmp eq i32 %and, 0
%lshr = lshr i32 %call, 5
%getelementptr67 = getelementptr inbounds nuw %struct.snork, ptr addrspace(3) null, i32 %call
%addrspacecast68 = addrspacecast ptr addrspace(3) %getelementptr67 to ptr
%getelementptr69 = getelementptr inbounds nuw i8, ptr addrspace(3) null, i32 %lshr
%addrspacecast70 = addrspacecast ptr addrspace(3) %getelementptr69 to ptr
%getelementptr71 = getelementptr inbounds nuw i32, ptr addrspace(3) null, i32 %lshr
%addrspacecast72 = addrspacecast ptr addrspace(3) %getelementptr71 to ptr
%load73 = load ptr, ptr addrspace(4) @global, align 8
%addrspacecast74 = addrspacecast ptr %load73 to ptr addrspace(4)
%load75 = load ptr, ptr addrspace(4) @global.2, align 8
%addrspacecast76 = addrspacecast ptr %load75 to ptr addrspace(1)
%icmp77 = icmp ne i32 %load5, -1
%add = add i32 %load8, -1
%uitofp = uitofp i32 %add to float
%getelementptr78 = getelementptr inbounds nuw i8, ptr addrspace(3) %getelementptr67, i32 20
%addrspacecast79 = addrspacecast ptr addrspace(3) %getelementptr78 to ptr
%getelementptr80 = getelementptr inbounds nuw i8, ptr addrspace(3) %getelementptr67, i32 4
%addrspacecast81 = addrspacecast ptr addrspace(3) %getelementptr80 to ptr
%getelementptr82 = getelementptr inbounds nuw i8, ptr addrspace(3) %getelementptr67, i32 16
%addrspacecast83 = addrspacecast ptr addrspace(3) %getelementptr82 to ptr
%getelementptr84 = getelementptr inbounds nuw i8, ptr addrspace(3) %getelementptr67, i32 12
%addrspacecast85 = addrspacecast ptr addrspace(3) %getelementptr84 to ptr
%getelementptr86 = getelementptr inbounds nuw i8, ptr addrspace(3) %getelementptr67, i32 8
%addrspacecast87 = addrspacecast ptr addrspace(3) %getelementptr86 to ptr
%getelementptr88 = getelementptr inbounds nuw i8, ptr addrspace(3) %getelementptr67, i32 24
%addrspacecast89 = addrspacecast ptr addrspace(3) %getelementptr88 to ptr
%load90 = load ptr, ptr addrspace(4) null, align 4294967296
%addrspacecast91 = addrspacecast ptr %load90 to ptr addrspace(4)
%load92 = load ptr, ptr addrspace(4) @global.4, align 8
%addrspacecast93 = addrspacecast ptr %load92 to ptr addrspace(1)
%load94 = load ptr, ptr addrspace(4) @global.3, align 8
%addrspacecast95 = addrspacecast ptr %load94 to ptr addrspace(1)
%load96 = load ptr, ptr addrspace(4) @global.1, align 8
%addrspacecast97 = addrspacecast ptr %load96 to ptr addrspace(1)
%icmp98 = icmp eq ptr addrspace(1) %addrspacecast63, addrspacecast (ptr null to ptr addrspace(1))
%sext = sext i32 %load to i64
%icmp99 = icmp ne i32 %add, 0
%zext = zext i1 %icmp99 to i32
%add100 = add i32 %load7, %zext
%getelementptr101 = getelementptr inbounds nuw i8, ptr addrspace(1) %addrspacecast33, i64 4294967295
%getelementptr102 = getelementptr inbounds nuw i8, ptr addrspace(1) %addrspacecast63, i64 8
br label %bb103
bb103: ; preds = %bb364, %bb
%phi = phi i32 [ -1, %bb ], [ %phi143, %bb364 ]
%phi104 = phi nsz float [ 0.0, %bb ], [ %phi144, %bb364 ]
%phi105 = phi i32 [ -1, %bb ], [ %phi365, %bb364 ]
%call106 = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
%icmp107 = icmp slt i32 %phi105, 0
%call108 = tail call i32 asm sideeffect "", "=v,0"(i32 range(i32 0, 2) 0) #7
%icmp109 = icmp ne i32 %call108, 0
%call110 = tail call i32 @llvm.amdgcn.ballot.i32(i1 %icmp109)
%icmp111 = icmp eq i32 %call110, 0
br i1 %icmp111, label %bb113, label %bb112
bb112: ; preds = %bb103
tail call void @llvm.trap()
unreachable
bb113: ; preds = %bb103
%call114 = tail call i32 @llvm.amdgcn.ballot.i32(i1 %icmp107)
%and115 = and i32 %call114, %call106
%call116 = tail call noundef range(i32 0, 33) i32 @llvm.ctpop.i32(i32 %and115)
%icmp117 = icmp samesign ugt i32 %call116, 3
br i1 %icmp117, label %bb118, label %bb141
bb118: ; preds = %bb113
%icmp119 = icmp eq i32 %and115, -1
br i1 %icmp119, label %bb120, label %bb122
bb120: ; preds = %bb118
%load121 = load volatile i8, ptr %addrspacecast70, align 1, !noalias.addrspace !1
%trunc = trunc nuw i8 %load121 to i1
br i1 %trunc, label %bb398, label %bb122
bb122: ; preds = %bb120, %bb118
br i1 %icmp, label %bb123, label %bb127
bb123: ; preds = %bb122
%atomicrmw = atomicrmw add ptr addrspace(1) %addrspacecast, i32 %call116 syncscope("agent") monotonic, align 4
%load124 = load volatile i32, ptr %addrspacecast72, align 4, !noalias.addrspace !1
%icmp125 = icmp ult i32 %load124, %load1
br i1 %icmp125, label %bb127, label %bb126
bb126: ; preds = %bb123
store volatile i8 1, ptr %addrspacecast70, align 1, !noalias.addrspace !1
br label %bb127
bb127: ; preds = %bb126, %bb123, %bb122
br i1 %icmp107, label %bb128, label %bb141
bb128: ; preds = %bb127
%load129 = load i32, ptr addrspace(1) %addrspacecast16, align 4
%urem = urem i32 %load129, %load3
%load130 = load i32, ptr addrspace(1) %addrspacecast39, align 4
%urem131 = urem i32 %load130, %load3
%zext132 = zext i32 %urem131 to i64
%getelementptr133 = getelementptr inbounds nuw i32, ptr addrspace(1) %addrspacecast42, i64 %zext132
%load134 = load i32, ptr addrspace(1) %getelementptr133, align 4
%load135 = load <4 x i32>, ptr addrspace(4) %addrspacecast74, align 16
%call136 = tail call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %load135, i32 %load134, i32 0, i32 0, i32 0)
%bitcast = bitcast float %call136 to i32
%and137 = and i32 %bitcast, 65536
%icmp138 = icmp eq i32 %and137, 0
%select = select i1 %icmp138, i32 -1, i32 %load134
%load139 = load float, ptr addrspace(1) %addrspacecast76, align 4
store i32 -1, ptr addrspace(1) null, align 4294967296
store float 0x3FF0000100000000, ptr addrspace(1) %addrspacecast45, align 4
store float 0.000000e+00, ptr addrspace(5) %alloca, align 16
%zext140 = zext i32 %urem to i64
br label %bb141
bb141: ; preds = %bb128, %bb127, %bb113
%phi142 = phi i32 [ %load30, %bb128 ], [ 0, %bb127 ], [ 0, %bb113 ]
%phi143 = phi i32 [ %select, %bb128 ], [ %phi, %bb127 ], [ %phi, %bb113 ]
%phi144 = phi nsz float [ %load139, %bb128 ], [ %phi104, %bb127 ], [ %phi104, %bb113 ]
%phi145 = phi i32 [ 1, %bb128 ], [ %phi105, %bb127 ], [ %phi105, %bb113 ]
%phi146 = phi i64 [ %sext, %bb128 ], [ 0, %bb127 ], [ 0, %bb113 ]
%phi147 = phi i64 [ %zext140, %bb128 ], [ 0, %bb127 ], [ 0, %bb113 ]
%phi148 = phi i32 [ %load129, %bb128 ], [ 0, %bb127 ], [ 0, %bb113 ]
%icmp149 = icmp sgt i32 %phi145, 0
%icmp150 = icmp ult i32 %phi142, 1073741824
%select151 = select i1 %icmp149, i1 %icmp150, i1 false
br i1 %select151, label %bb152, label %bb166
bb152: ; preds = %bb141
%and153 = and i32 %phi142, 134217727
%call154 = tail call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> zeroinitializer, i32 %and153, i32 0, i32 0, i32 0)
%bitcast155 = bitcast <3 x float> %call154 to <3 x i32>
%extractelement = extractelement <3 x i32> %bitcast155, i64 2
%lshr156 = lshr i32 %extractelement, 8
%zext157 = zext nneg i32 %lshr156 to i64
%getelementptr158 = getelementptr inbounds nuw i8, ptr addrspace(1) %addrspacecast33, i64 %zext157
store i8 2, ptr addrspace(1) %getelementptr158, align 1
br label %bb159
bb159: ; preds = %bb159, %bb152
%call160 = tail call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
%bitcast161 = bitcast <3 x float> %call160 to <3 x i32>
%extractelement162 = extractelement <3 x i32> %bitcast161, i64 2
%lshr163 = lshr i32 %extractelement162, 8
%zext164 = zext nneg i32 %lshr163 to i64
%getelementptr165 = getelementptr inbounds nuw i8, ptr addrspace(1) %addrspacecast33, i64 %zext164
store i8 2, ptr addrspace(1) %getelementptr165, align 1
br label %bb159, !llvm.loop !2
bb166: ; preds = %bb141
%load167 = load float, ptr addrspace(1) %addrspacecast25, align 16
%load168 = load float, ptr addrspace(1) inttoptr (i64 4 to ptr addrspace(1)), align 4
%load169 = load float, ptr addrspace(1) inttoptr (i64 8 to ptr addrspace(1)), align 8
%and170 = and i32 %phi142, 536870911
%load171 = load <4 x i32>, ptr addrspace(4) null, align 4294967296
%call172 = tail call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %load171, i32 %and170, i32 0, i32 0, i32 0)
%bitcast173 = bitcast float %call172 to i32
%zext174 = zext i32 %bitcast173 to i64
%getelementptr175 = getelementptr inbounds nuw i32, ptr addrspace(1) %addrspacecast36, i64 %zext174
%load176 = load i32, ptr addrspace(1) %getelementptr175, align 4
%icmp177 = icmp ne i32 %and170, %load5
%and178 = and i1 %icmp77, %icmp177
%icmp179 = icmp eq i32 %phi143, %and170
br i1 %icmp179, label %bb180, label %bb181
bb180: ; preds = %bb166
br label %bb181
bb181: ; preds = %bb180, %bb166
%phi182 = phi i32 [ 0, %bb180 ], [ %load176, %bb166 ]
%phi183 = phi i1 [ true, %bb180 ], [ %and178, %bb166 ]
%phi184 = phi i32 [ -1, %bb180 ], [ %and170, %bb166 ]
%phi185 = phi i32 [ 0, %bb180 ], [ %phi145, %bb166 ]
br i1 %phi183, label %bb204, label %bb186
bb186: ; preds = %bb181
%fmul = fmul reassoc nnan ninf nsz arcp contract float %phi144, %uitofp
%sub = sub nuw i32 %phi184, %load7
%mul = mul i32 %sub, %load8
%add187 = add i32 %add100, %mul
%mul188 = mul i32 %add187, 3
%call189 = tail call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> zeroinitializer, i32 %mul188, i32 0, i32 0, i32 0)
%extractelement190 = extractelement <2 x float> %call189, i64 1
%call191 = tail call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
%extractelement192 = extractelement <3 x float> %call191, i64 2
%extractelement193 = extractelement <3 x float> %call191, i64 0
%extractelement194 = extractelement <3 x float> %call191, i64 1
%fmul195 = fmul reassoc nnan ninf nsz arcp contract float %extractelement192, %fmul
%fmul196 = fmul reassoc nnan ninf nsz arcp contract float %fmul, %fmul
%fmul197 = fmul reassoc nnan ninf nsz arcp contract float %fmul196, %extractelement190
%fmul198 = fmul reassoc nnan ninf nsz arcp contract float %fmul197, %extractelement193
%fmul199 = fmul reassoc nnan ninf nsz arcp contract float %fmul198, %fmul195
%fneg = fneg reassoc nnan ninf nsz arcp contract float %fmul
%fmul200 = fmul reassoc nnan ninf nsz arcp contract float %extractelement194, %fneg
%load201 = load float, ptr addrspace(1) %addrspacecast28, align 16
%fmul202 = fmul reassoc nnan ninf nsz arcp contract float %fmul200, %load201
%fmul203 = fmul reassoc nnan ninf nsz arcp contract float %fmul202, %fmul195
%fdiv = fdiv reassoc nnan ninf nsz arcp contract float %fmul203, %fmul199
br label %bb204
bb204: ; preds = %bb186, %bb181
%phi205 = phi float [ %load169, %bb181 ], [ 0.000000e+00, %bb186 ]
%phi206 = phi float [ %load168, %bb181 ], [ 0.000000e+00, %bb186 ]
%phi207 = phi float [ 0.000000e+00, %bb181 ], [ %fdiv, %bb186 ]
%phi208 = phi float [ %load167, %bb181 ], [ 0.000000e+00, %bb186 ]
%lshr209 = lshr i32 %phi182, 27
%and210 = and i32 %lshr209, 7
%and211 = and i32 %phi182, 134217727
%zext212 = zext nneg i32 %and210 to i64
%getelementptr213 = getelementptr inbounds nuw %struct.barney, ptr addrspace(4) null, i64 %zext212
%load214 = load i64, ptr addrspace(4) %getelementptr213, align 8
%inttoptr = inttoptr i64 %load214 to ptr
%addrspacecast215 = addrspacecast ptr %inttoptr to ptr addrspace(1)
%shl = shl nuw nsw i32 %and211, 4
%zext216 = zext nneg i32 %shl to i64
%getelementptr217 = getelementptr inbounds nuw i8, ptr addrspace(1) %addrspacecast215, i64 %zext216
%getelementptr218 = getelementptr inbounds nuw i8, ptr addrspace(1) %getelementptr217, i64 4
%load219 = load i16, ptr addrspace(1) %getelementptr218, align 4
%load220 = load i32, ptr addrspace(1) null, align 4294967296
%lshr221 = lshr i32 %load220, 12
%getelementptr222 = getelementptr inbounds nuw i8, ptr addrspace(1) %getelementptr217, i64 12
%load223 = load i32, ptr addrspace(1) %getelementptr222, align 4
%lshr224 = lshr i32 %load223, 25
%and225 = and i32 %lshr224, 31
%add226 = add nsw i32 %and225, -1
%uitofp227 = uitofp i32 %add226 to float
%call228 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.minnum.f32(float nofpclass(nan inf) %phi144, float 0x3FEFFFEB00000000)
%fmul229 = fmul reassoc nnan ninf nsz arcp contract float %call228, %uitofp227
%fptoui = fptoui float %fmul229 to i32
%and230 = and i32 %load220, 65535
%and231 = and i16 %load219, 32767
%zext232 = zext nneg i16 %and231 to i32
%add233 = add nuw nsw i32 %and230, %zext232
%and234 = and i32 %load223, 1073741824
%icmp235 = icmp eq i32 %and234, 0
br i1 %icmp235, label %bb277, label %bb236
bb236: ; preds = %bb204
%icmp237 = icmp eq i16 %and231, 0
br i1 %icmp237, label %bb351, label %bb238
bb238: ; preds = %bb236
%getelementptr239 = getelementptr inbounds nuw i8, ptr addrspace(1) %getelementptr217, i64 16
%sub240 = sub nsw i32 %and211, %lshr221
%add241 = add i32 %sub240, %fptoui
%addrspacecast242 = addrspacecast ptr %inttoptr to ptr addrspace(4)
%load243 = load <4 x i32>, ptr addrspace(4) %addrspacecast242, align 16
%fmul244 = fmul reassoc nnan ninf nsz arcp contract float %phi205, %phi205
%call245 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.fma.f32(float nofpclass(nan inf) %phi206, float nofpclass(nan inf) %phi206, float nofpclass(nan inf) %fmul244)
%call246 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.fma.f32(float nofpclass(nan inf) %phi207, float nofpclass(nan inf) %phi207, float nofpclass(nan inf) %call245)
%call247 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) %call246)
%getelementptr248 = getelementptr inbounds %struct.zot.0, ptr addrspace(1) %addrspacecast51, i64 %phi146
%call249 = tail call float @llvm.amdgcn.rsq.f32(float 0.000000e+00)
%load250 = load i32, ptr addrspace(1) %getelementptr239, align 4
%mul251 = mul i32 %load250, %and225
%add252 = add i32 %add241, %mul251
%call253 = tail call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %load243, i32 %add252, i32 0, i32 0, i32 0)
%extractelement254 = extractelement <4 x float> %call253, i64 3
%fmul255 = fmul reassoc nnan ninf nsz arcp contract float %extractelement254, %extractelement254
%call256 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) %fmul255)
store volatile i32 %phi184, ptr %addrspacecast79, align 4, !noalias.addrspace !1
store volatile i32 %phi182, ptr %addrspacecast68, align 4, !noalias.addrspace !1
store volatile i32 %and230, ptr %addrspacecast81, align 4, !noalias.addrspace !1
%fmul257 = fmul reassoc nnan ninf nsz arcp contract float %phi207, %call256
%fdiv258 = fdiv reassoc nnan ninf nsz arcp contract float %fmul257, %call247
%fmul259 = fmul reassoc nnan ninf nsz arcp contract float %fdiv258, %fdiv258
%fcmp = fcmp reassoc nnan ninf nsz arcp contract ogt float %fmul259, 0.000000e+00
%select260 = select reassoc nnan ninf nsz arcp contract i1 %fcmp, float %call249, float 0.000000e+00
%fmul261 = fmul reassoc nnan ninf nsz arcp contract float %select260, %fdiv258
store float %fmul261, ptr addrspace(1) %getelementptr248, align 16
%fdiv262 = fdiv reassoc nnan ninf nsz arcp contract float 1.000000e+00, %call247
br label %bb263
bb263: ; preds = %bb263, %bb238
%load264 = load i32, ptr addrspace(1) null, align 4294967296
%mul265 = mul i32 %load264, %and225
%add266 = add i32 %add241, %mul265
%call267 = tail call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %load243, i32 %add266, i32 0, i32 0, i32 0)
%extractelement268 = extractelement <4 x float> %call267, i64 3
%fmul269 = fmul reassoc nnan ninf nsz arcp contract float %extractelement268, %extractelement268
%call270 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) %fmul269)
store volatile i32 %phi184, ptr %addrspacecast79, align 4, !noalias.addrspace !1
store volatile i32 %phi182, ptr %addrspacecast68, align 4, !noalias.addrspace !1
store volatile i32 0, ptr %addrspacecast81, align 4, !noalias.addrspace !1
%fmul271 = fmul reassoc nnan ninf nsz arcp contract float %phi207, %call270
%fmul272 = fmul reassoc nnan ninf nsz arcp contract float %fmul271, %fdiv262
%fmul273 = fmul reassoc nnan ninf nsz arcp contract float %fmul272, %fmul272
%fcmp274 = fcmp reassoc nnan ninf nsz arcp contract ogt float %fmul273, 0.000000e+00
%select275 = select reassoc nnan ninf nsz arcp contract i1 %fcmp274, float %call249, float 0.000000e+00
%fmul276 = fmul reassoc nnan ninf nsz arcp contract float %select275, %fmul272
store float %fmul276, ptr addrspace(1) %getelementptr248, align 16
br label %bb263, !llvm.loop !4
bb277: ; preds = %bb204
%icmp278 = icmp slt i32 %load223, 0
br i1 %icmp278, label %bb279, label %bb348
bb279: ; preds = %bb277
%fmul280 = fmul reassoc nnan ninf nsz arcp contract float %phi207, %phi207
%call281 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) %fmul280)
%fdiv282 = fdiv reassoc nnan ninf nsz arcp contract float 1.000000e+00, %call281
%icmp283 = icmp eq i16 %and231, 0
br i1 %icmp283, label %bb351, label %bb284
bb284: ; preds = %bb279
%sub285 = sub nsw i32 %and211, %lshr221
%addrspacecast286 = addrspacecast ptr %inttoptr to ptr addrspace(4)
%fmul287 = fmul reassoc nnan ninf nsz arcp contract float %fdiv282, %phi207
%fmul288 = fmul reassoc nnan ninf nsz arcp contract float %fdiv282, %phi206
%fmul289 = fmul reassoc nnan ninf nsz arcp contract float %fdiv282, %phi205
%getelementptr290 = getelementptr inbounds %struct.zot.0, ptr addrspace(1) %addrspacecast51, i64 %phi146
%add291 = add i32 %sub285, %fptoui
%load292 = load <4 x i32>, ptr addrspace(4) %addrspacecast286, align 16
%add293 = add i32 %add291, 1
%add294 = add i32 %add291, %and225
%add295 = add i32 %add294, 1
br label %bb296
bb296: ; preds = %bb341, %bb284
%phi297 = phi i32 [ %and230, %bb284 ], [ %add346, %bb341 ]
%call298 = tail call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %load292, i32 %add291, i32 0, i32 0, i32 0)
%extractelement299 = extractelement <4 x float> %call298, i64 0
%extractelement300 = extractelement <4 x float> %call298, i64 3
%call301 = tail call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %load292, i32 %add293, i32 0, i32 0, i32 0)
%extractelement302 = extractelement <4 x float> %call301, i64 3
%fsub = fsub reassoc nnan ninf nsz arcp contract float %extractelement302, %extractelement300
%call303 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.fma.f32(float nofpclass(nan inf) %fsub, float nofpclass(nan inf) %fmul229, float nofpclass(nan inf) %extractelement300)
%call304 = tail call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %load292, i32 %add294, i32 0, i32 0, i32 0)
%extractelement305 = extractelement <4 x float> %call304, i64 3
%call306 = tail call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %load292, i32 %add295, i32 0, i32 0, i32 0)
%extractelement307 = extractelement <4 x float> %call306, i64 3
%fsub308 = fsub reassoc nnan ninf nsz arcp contract float %extractelement307, %extractelement305
%call309 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.fma.f32(float nofpclass(nan inf) %fsub308, float nofpclass(nan inf) %fmul229, float nofpclass(nan inf) %extractelement305)
%fneg310 = fneg reassoc nnan ninf nsz arcp contract float %extractelement299
%fmul311 = fmul reassoc nnan ninf nsz arcp contract float %extractelement299, %extractelement299
%call312 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.sqrt.f32(float nofpclass(nan inf) %fmul311)
%fdiv313 = fdiv reassoc nnan ninf nsz arcp contract float 1.000000e+00, %call312
%fmul314 = fmul reassoc nnan ninf nsz arcp contract float %fdiv313, %fneg310
%fmul315 = fmul reassoc nnan ninf nsz arcp contract float %fmul314, %fmul287
%fmul316 = fmul reassoc nnan ninf nsz arcp contract float %fmul315, %fmul315
%fsub317 = fsub reassoc nnan ninf nsz arcp contract float 1.000000e+00, %fmul316
%fcmp318 = fcmp reassoc nnan ninf nsz arcp contract oeq float %fsub317, 0.000000e+00
br i1 %fcmp318, label %bb341, label %bb319
bb319: ; preds = %bb296
%extractelement320 = extractelement <4 x float> %call301, i64 2
%extractelement321 = extractelement <4 x float> %call298, i64 2
%fsub322 = fsub reassoc nnan ninf nsz arcp contract float %extractelement320, %extractelement321
%call323 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.fma.f32(float nofpclass(nan inf) %fsub322, float nofpclass(nan inf) %fmul229, float nofpclass(nan inf) %extractelement321)
%extractelement324 = extractelement <4 x float> %call301, i64 1
%extractelement325 = extractelement <4 x float> %call298, i64 1
%fsub326 = fsub reassoc nnan ninf nsz arcp contract float %extractelement324, %extractelement325
%call327 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.fma.f32(float nofpclass(nan inf) %fsub326, float nofpclass(nan inf) %fmul229, float nofpclass(nan inf) %extractelement325)
%fsub328 = fsub reassoc nnan ninf nsz arcp contract float %extractelement299, %phi208
%fmul329 = fmul reassoc nnan ninf nsz arcp contract float %fmul314, %fsub328
%fmul330 = fmul reassoc nnan ninf nsz arcp contract float %fmul289, %call323
%call331 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.fma.f32(float nofpclass(nan inf) %call327, float nofpclass(nan inf) %fmul288, float nofpclass(nan inf) %fmul330)
%call332 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.fma.f32(float nofpclass(nan inf) %fsub328, float nofpclass(nan inf) %fmul287, float nofpclass(nan inf) %call331)
%fmul333 = fmul reassoc nnan ninf nsz arcp contract float %fmul329, %fmul315
%fsub334 = fsub reassoc nnan ninf nsz arcp contract float %call332, %fmul333
%fmul335 = fmul reassoc nnan ninf nsz arcp contract float %fsub334, %fdiv282
%fdiv336 = fdiv reassoc nnan ninf nsz arcp contract float %fmul335, %fsub317
%call337 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.minnum.f32(float nofpclass(nan inf) %call312, float nofpclass(nan inf) 0.000000e+00)
%call338 = tail call reassoc nnan ninf nsz arcp contract noundef float @llvm.maxnum.f32(float nofpclass(nan inf) %call337, float 0.000000e+00)
%fmul339 = fmul reassoc nnan ninf nsz arcp contract float %call338, %fdiv313
%call340 = tail call reassoc nnan ninf nsz arcp contract float @llvm.fabs.f32(float nofpclass(nan inf) %fdiv336)
br label %bb341
bb341: ; preds = %bb319, %bb296
%phi342 = phi nsz float [ %fmul339, %bb319 ], [ 0.000000e+00, %bb296 ]
%phi343 = phi float [ %call340, %bb319 ], [ 0.000000e+00, %bb296 ]
%fsub344 = fsub reassoc nnan ninf nsz arcp contract float %call309, %call303
%fmul345 = fmul reassoc nnan ninf nsz arcp contract float %phi342, %fsub344
%fadd = fadd reassoc nnan ninf nsz arcp contract float %fmul345, %call303
store volatile i32 %load220, ptr %addrspacecast83, align 4, !noalias.addrspace !1
store float %fadd, ptr addrspace(1) %getelementptr290, align 16
store float 1.000000e+00, ptr addrspace(1) null, align 4294967296
%add346 = add nuw nsw i32 %phi297, 1
%icmp347 = icmp samesign ult i32 %add346, %add233
br i1 %icmp347, label %bb296, label %bb351
bb348: ; preds = %bb277
%icmp349 = icmp eq i16 %and231, 0
br i1 %icmp349, label %bb351, label %bb350
bb350: ; preds = %bb398, %bb348
ret void
bb351: ; preds = %bb348, %bb341, %bb279, %bb236
%phi352 = phi float [ 0.000000e+00, %bb279 ], [ 0.000000e+00, %bb348 ], [ 0.000000e+00, %bb236 ], [ %phi343, %bb341 ]
br label %bb353
bb353: ; preds = %bb353, %bb351
%phi354 = phi i32 [ %phi185, %bb351 ], [ %add355, %bb353 ]
%add355 = add nsw i32 %phi354, -1
%getelementptr356 = getelementptr inbounds float, ptr addrspace(5) %alloca, i32 %add355
%load357 = load float, ptr addrspace(5) %getelementptr356, align 4
%icmp358 = icmp sgt i32 %phi354, 1
%fcmp359 = fcmp reassoc nnan ninf nsz arcp contract ogt float %load357, %phi352
%select360 = select i1 %icmp358, i1 %fcmp359, i1 false
br i1 %select360, label %bb353, label %bb361
bb361: ; preds = %bb353
%icmp362 = icmp eq i32 %add355, 0
store i8 1, ptr addrspace(1) %getelementptr101, align 1
%atomicrmw363 = atomicrmw add ptr addrspace(1) %addrspacecast13, i32 1 syncscope("agent") monotonic, align 4
store i32 %phi148, ptr addrspace(1) %addrspacecast19, align 4
store volatile i32 536870911, ptr %addrspacecast89, align 4, !noalias.addrspace !1
br i1 %icmp362, label %bb366, label %bb364
bb364: ; preds = %bb397, %bb389, %bb386, %bb385, %bb361
%phi365 = phi i32 [ %add355, %bb361 ], [ -1, %bb385 ], [ -1, %bb386 ], [ -1, %bb389 ], [ -1, %bb397 ]
br label %bb103
bb366: ; preds = %bb361
%load367 = load volatile i32, ptr %addrspacecast79, align 4, !noalias.addrspace !1
%load368 = load volatile i32, ptr %addrspacecast68, align 4, !noalias.addrspace !1
%load369 = load volatile i32, ptr %addrspacecast81, align 4, !noalias.addrspace !1
%load370 = load volatile float, ptr %addrspacecast87, align 4, !noalias.addrspace !1
%load371 = load volatile float, ptr %addrspacecast85, align 4, !noalias.addrspace !1
%call372 = tail call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %load171, i32 %load367, i32 0, i32 0, i32 0)
%bitcast373 = bitcast float %call372 to i32
%load374 = load <4 x i32>, ptr addrspace(4) %addrspacecast91, align 16
%call375 = tail call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %load374, i32 %bitcast373, i32 0, i32 0, i32 0)
%getelementptr376 = getelementptr inbounds nuw %struct.zot.1, ptr addrspace(1) %addrspacecast93, i64 %phi147
%load377 = load i32, ptr addrspace(1) %addrspacecast22, align 4
%and378 = and i32 %load377, -285212672
%or = or disjoint i32 %and378, 268435456
store i32 0, ptr addrspace(1) %addrspacecast95, align 4
store i32 %or, ptr addrspace(1) %addrspacecast97, align 4
%getelementptr379 = getelementptr inbounds nuw i8, ptr addrspace(1) %getelementptr376, i64 8
store float %call375, ptr addrspace(1) %getelementptr379, align 8
%load380 = load i32, ptr addrspace(1) null, align 4294967296
%load381 = load i16, ptr addrspace(1) inttoptr (i64 6 to ptr addrspace(1)), align 2
%zext382 = zext i16 %load381 to i32
%and383 = and i32 %load380, 1073741824
%icmp384 = icmp eq i32 %and383, 0
br i1 %icmp384, label %bb387, label %bb385
bb385: ; preds = %bb366
store i32 0, ptr addrspace(1) %addrspacecast60, align 4
br i1 %icmp98, label %bb364, label %bb386
bb386: ; preds = %bb385
store float 0x47EFFFFFE0000000, ptr addrspace(1) %getelementptr102, align 8
br label %bb364
bb387: ; preds = %bb366
%icmp388 = icmp slt i32 %load380, 0
br i1 %icmp388, label %bb389, label %bb397
bb389: ; preds = %bb387
%sub390 = sub i32 %load369, %zext382
%zext391 = zext i32 %sub390 to i64
%getelementptr392 = getelementptr inbounds nuw %struct.zot.1, ptr addrspace(1) null, i64 %zext391
%getelementptr393 = getelementptr inbounds nuw i8, ptr addrspace(1) %getelementptr392, i64 4
%load394 = load i32, ptr addrspace(1) %getelementptr393, align 4
store i32 0, ptr addrspace(1) %addrspacecast66, align 4
%and395 = and i32 %load394, 1073741823
%or396 = or disjoint i32 %and395, -2147483648
store i32 %or396, ptr addrspace(1) %addrspacecast48, align 4
br label %bb364
bb397: ; preds = %bb387
store float 0.000000e+00, ptr addrspace(1) inttoptr (i64 12 to ptr addrspace(1)), align 4
store float 0.000000e+00, ptr addrspace(1) %addrspacecast54, align 16
store float 0.000000e+00, ptr addrspace(1) %addrspacecast57, align 4
br label %bb364
bb398: ; preds = %bb120
call void @llvm.lifetime.end.p5(ptr addrspace(5) %alloca) #8
br label %bb350
}
declare float @llvm.minnum.f32(float, float) #3
declare float @llvm.maxnum.f32(float, float) #3
declare float @llvm.fma.f32(float, float, float) #3
declare float @llvm.fabs.f32(float) #3
declare float @llvm.sqrt.f32(float) #3
declare i32 @llvm.ctpop.i32(i32) #3
declare float @llvm.amdgcn.rsq.f32(float) #4
declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() #4
declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #5
declare i32 @llvm.amdgcn.ballot.i32(i1) #6
declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #5
declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32 immarg) #5
declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32 immarg) #5
attributes #0 = { cold noreturn nounwind memory(inaccessiblemem: write) }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { convergent norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1201" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" "uniform-work-group-size"="true" }
attributes #3 = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #5 = { nocallback nofree nosync nounwind willreturn memory(read) }
attributes #6 = { convergent nocallback nofree nounwind willreturn memory(none) }
attributes #7 = { convergent nounwind }
attributes #8 = { nounwind }
!0 = !{}
!1 = !{i32 1, i32 3, i32 4, i32 10}
!2 = distinct !{!2, !3}
!3 = !{!"llvm.loop.peeled.count", i32 1}
!4 = distinct !{!4, !3}