blob: 145b3b90c939e0205d1c2a936974ad1514816b03 [file] [log] [blame] [edit]
; RUN: opt %s -strip-debug -o %t.no_debug.ll -S
; RUN: llc -O3 -mcpu=gfx1250 < %s -filetype=obj -o %t.with_debug.o
; RUN: llc -O3 -mcpu=gfx1250 < %t.no_debug.ll -filetype=obj -o %t.no_debug.o
; RUN: llvm-strip %t.with_debug.o %t.no_debug.o
; RUN: cmp %t.with_debug.o %t.no_debug.o
; Ensure that compiling with and without debug generates identical code.
; Test that revertScheduling only updates LiveIntervals if non-debug
; instructions are reordered.
target triple = "amdgcn-amd-amdhsa"
declare void @llvm.amdgcn.s.barrier() #0
define amdgpu_kernel void @_test_revertScheduling(i32 %lda, ptr addrspace(1) %infoA.coerce, i32 %add.ptr13.idx, ptr addrspace(3) %add.ptr10, i32 %i0, i1 %cmp59.not, ptr addrspace(3) %arrayidx53, ptr addrspace(3) %arrayidx75.3.6, <2 x float> %f1, float %f2, <2 x float> %f3, <14 x float> %f4) {
entry:
%cond13.in.i10.i.i.i = load i16, ptr addrspace(4) null, align 2
%f5 = tail call i32 @llvm.amdgcn.workitem.id.x()
#dbg_value(ptr addrspace(1) %add.ptr, !4, !DIExpression(), !13)
%idxprom = zext i32 %f5 to i64
%arrayidx = getelementptr float, ptr addrspace(1) null, i64 %idxprom
%add16.1 = add i32 %lda, %f5
%idxprom.1 = sext i32 %add16.1 to i64
%arrayidx.1 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.1
%mul15.2 = shl i32 %lda, 1
%add16.2 = add i32 %mul15.2, %f5
%idxprom.2 = sext i32 %add16.2 to i64
%arrayidx.2 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.2
%mul15.3 = mul i32 %lda, 3
%add16.3 = add i32 %mul15.3, %f5
%idxprom.3 = sext i32 %add16.3 to i64
%arrayidx.3 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.3
%mul15.4 = shl i32 %lda, 2
%add16.4 = add i32 %mul15.4, %f5
%idxprom.4 = sext i32 %add16.4 to i64
%arrayidx.4 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.4
%mul15.5 = mul i32 %lda, 5
%add16.5 = add i32 %mul15.5, %f5
%idxprom.5 = sext i32 %add16.5 to i64
%arrayidx.5 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.5
%mul15.6 = mul i32 %lda, 6
%add16.6 = add i32 %mul15.6, %f5
%idxprom.6 = sext i32 %add16.6 to i64
%arrayidx.6 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.6
%mul15.7 = mul i32 %lda, 7
%add16.7 = add i32 %mul15.7, %f5
%idxprom.7 = sext i32 %add16.7 to i64
%arrayidx.7 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.7
%mul15.8 = shl i32 %lda, 3
%add16.8 = add i32 %mul15.8, %f5
%idxprom.8 = sext i32 %add16.8 to i64
%arrayidx.8 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.8
%mul15.9 = mul i32 %lda, 9
%add16.9 = add i32 %mul15.9, %f5
%idxprom.9 = sext i32 %add16.9 to i64
%arrayidx.9 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.9
%mul15.10 = mul i32 %lda, 10
%add16.10 = add i32 %mul15.10, %f5
%idxprom.10 = sext i32 %add16.10 to i64
%arrayidx.10 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.10
%mul15.11 = mul i32 %lda, 11
%add16.11 = add i32 %mul15.11, %f5
%idxprom.11 = sext i32 %add16.11 to i64
%arrayidx.11 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.11
%mul15.12 = mul i32 %lda, 12
%add16.12 = add i32 %mul15.12, %f5
%idxprom.12 = sext i32 %add16.12 to i64
%arrayidx.12 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.12
%mul15.13 = mul i32 %lda, 13
%add16.13 = add i32 %mul15.13, %f5
%idxprom.13 = sext i32 %add16.13 to i64
%arrayidx.13 = getelementptr float, ptr addrspace(1) null, i64 %idxprom.13
%f6 = load float, ptr addrspace(1) %arrayidx.13, align 4
%cmp54 = fcmp oeq float %f6, 0.000000e+00
br i1 %cmp59.not, label %if.end82, label %for.body71.preheader
for.body71.preheader: ; preds = %entry
%f7 = load float, ptr addrspace(1) %arrayidx, align 4
%mul65 = fmul float %f7, 0.000000e+00
%f8 = insertelement <14 x float> zeroinitializer, float %mul65, i64 0
br label %if.end82
if.end82: ; preds = %for.body71.preheader, %entry
%f9 = load <2 x float>, ptr addrspace(3) null, align 8
tail call void @llvm.amdgcn.s.barrier()
%f10 = load float, ptr addrspace(3) %add.ptr10, align 4
%f11 = load float, ptr addrspace(3) null, align 4
fence acquire
%f12 = load float, ptr addrspace(3) %add.ptr10, align 4
%f13 = load float, ptr addrspace(3) %arrayidx53, align 4
%cmp54.2 = fcmp oeq float %f10, 0.000000e+00
%cmp54.1 = fcmp une float %f13, 0.000000e+00
%spec.select.1 = select i1 %cmp54.1, i32 %add16.13, i32 0
%spec.select.2 = select i1 %cmp54.2, i32 0, i32 %spec.select.1
%mul65.3 = fmul float 0.000000e+00, %f2
%f14 = insertelement <2 x float> zeroinitializer, float %mul65.3, i64 0
%f15 = fmul <2 x float> zeroinitializer, %f9
%f16 = shufflevector <2 x float> %f15, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%f17 = shufflevector <14 x float> zeroinitializer, <14 x float> %f16, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15>
%f18 = insertelement <14 x float> %f17, float %f11, i64 13
%f19 = shufflevector <14 x float> %f18, <14 x float> zeroinitializer, <2 x i32> <i32 12, i32 13>
%f20 = load <2 x float>, ptr addrspace(3) null, align 8
%f21 = fmul contract <2 x float> zeroinitializer, %f20
store float 0.000000e+00, ptr addrspace(3) null, align 4
%f22 = load <2 x float>, ptr addrspace(3) %arrayidx75.3.6, align 8
%f23 = fmul <2 x float> zeroinitializer, %f22
%f24 = shufflevector <2 x float> %f23, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%f25 = shufflevector <14 x float> %f4, <14 x float> %f24, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 14, i32 15, i32 poison, i32 poison>
%f26 = fsub contract <2 x float> %f19, %f21
%f27 = shufflevector <2 x float> %f26, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%f28 = shufflevector <14 x float> %f25, <14 x float> %f27, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15>
%f29 = extractelement <14 x float> %f28, i64 0
%f30 = load <2 x float>, ptr addrspace(3) null, align 4
%f31 = shufflevector <14 x float> %f28, <14 x float> zeroinitializer, <2 x i32> <i32 11, i32 12>
%f32 = fsub <2 x float> zeroinitializer, %f14
%f33 = fmul contract <2 x float> %f32, zeroinitializer
%f34 = fsub contract <2 x float> %f31, %f33
fence release
%f35 = load <2 x float>, ptr addrspace(3) inttoptr (i32 32 to ptr addrspace(3)), align 8
%f36 = fmul contract <2 x float> zeroinitializer, %f35
%f37 = load <2 x float>, ptr addrspace(3) inttoptr (i32 24 to ptr addrspace(3)), align 8
%f38 = fmul contract <2 x float> %f3, %f37
%f39 = fsub contract <2 x float> %f33, %f38
%f40 = shufflevector <2 x float> %f39, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%f41 = shufflevector <14 x float> zeroinitializer, <14 x float> %f40, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%f42 = fsub contract <2 x float> %f1, %f36
%f43 = shufflevector <2 x float> %f42, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%f44 = shufflevector <14 x float> %f41, <14 x float> %f43, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison>
%f45 = shufflevector <14 x float> %f44, <14 x float> zeroinitializer, <2 x i32> <i32 7, i32 8>
%f46 = fmul contract <2 x float> zeroinitializer, %f3
%f47 = fsub contract <2 x float> %f45, %f46
%f48 = load <2 x float>, ptr addrspace(3) inttoptr (i32 48 to ptr addrspace(3)), align 8
%f49 = fsub <2 x float> %f34, %f48
%f50 = fmul <2 x float> zeroinitializer, %f30
%f51 = shufflevector <2 x float> %f47, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%f52 = shufflevector <14 x float> %f44, <14 x float> %f51, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%f53 = fsub <2 x float> %f49, %f50
%f54 = shufflevector <2 x float> %f53, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%f55 = shufflevector <14 x float> %f52, <14 x float> %f54, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 14, i32 15, i32 poison>
%f56 = extractelement <14 x float> %f55, i64 7
%mul65.7 = fmul float %f56, 0.000000e+00
%f57 = load <2 x float>, ptr addrspace(3) inttoptr (i32 40 to ptr addrspace(3)), align 8
store float 0.000000e+00, ptr addrspace(3) %add.ptr10, align 4
%f58 = insertelement <2 x float> zeroinitializer, float %mul65.7, i64 0
%f59 = shufflevector <14 x float> %f55, <14 x float> zeroinitializer, <2 x i32> <i32 10, i32 11>
%f60 = fmul <2 x float> %f58, %f57
%f61 = fsub <2 x float> %f59, %f60
%f62 = load <2 x float>, ptr addrspace(3) %arrayidx53, align 4
%f63 = fsub <2 x float> %f61, %f62
%f64 = shufflevector <2 x float> %f63, <2 x float> zeroinitializer, <14 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%cmp59.13 = icmp ugt i32 %f5, 0
%f65 = shufflevector <14 x float> zeroinitializer, <14 x float> %f64, <14 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 14, i32 15, i32 poison, i32 poison, i32 poison>
%promotealloca.13 = select i1 %cmp59.13, <14 x float> zeroinitializer, <14 x float> %f65
%cmp54.3 = fcmp oeq float %f12, 0.000000e+00
%cmp54.4 = fcmp oeq float %f29, 0.000000e+00
%spec.select.3 = select i1 %cmp54.3, i32 0, i32 %spec.select.2
%spec.select.4 = select i1 %cmp54.4, i32 0, i32 %spec.select.3
%conv.i.i = zext i16 %cond13.in.i10.i.i.i to i32
%f66 = tail call i32 @llvm.amdgcn.workitem.id.y()
%add = or i32 %conv.i.i, %f66
%conv.i = sext i32 %add to i64
%add.ptr = getelementptr i32, ptr addrspace(1) %infoA.coerce, i64 %conv.i
store i32 %spec.select.4, ptr addrspace(1) %add.ptr, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx.1, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx.2, align 4
store float %mul65.3, ptr addrspace(1) %arrayidx.3, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx.4, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx.5, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx.6, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx.7, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx.8, align 4
%f67 = extractelement <14 x float> %promotealloca.13, i64 9
store float %f67, ptr addrspace(1) %arrayidx.9, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx.10, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx.11, align 4
store float 0.000000e+00, ptr addrspace(1) %arrayidx.12, align 4
store float 0.000000e+00, ptr addrspace(1) null, align 4
ret void
}
declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() #1
declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.y() #1
attributes #0 = { convergent nocallback nofree nounwind willreturn }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "AMD clang version 22.0.0git (ssh://github-emu/AMD-Lightning-Internal/llvm-project 25425 c51a87b7a53a3e8f308402aaffa3ecbc2953305a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, imports: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "test.cpp", directory: "/tmp", checksumkind: CSK_MD5, checksum: "cc205700bf3536fe4ff21a07daf7e01d")
!2 = !{}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !DILocalVariable(name: "info", scope: !5, file: !6, line: 162, type: !11)
!5 = distinct !DISubprogram(name: "test_revertScheduling", linkageName: "_test_revertScheduling", scope: !7, file: !6, line: 142, type: !9, scopeLine: 150, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, templateParams: !2, retainedNodes: !2)
!6 = !DIFile(filename: "kernels.hpp", directory: "/tmp")
!7 = !DINamespace(name: "v33200", scope: !8, exportSymbols: true)
!8 = !DINamespace(name: "solve", scope: null)
!9 = distinct !DISubroutineType(types: !10)
!10 = !{null}
!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64)
!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!13 = !DILocation(line: 0, scope: !5)