| ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-correlation-irreducible-loops.prof -sample-profile-use-profi=0 | opt -passes='print<block-freq>' -disable-output -use-iterative-bfi-inference 2>&1 | FileCheck %s |
| ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-correlation-irreducible-loops.prof -sample-profile-use-profi=0 -S | FileCheck %s --check-prefix=CHECK2 |
| ; RUN: opt < %s -passes='print<block-freq>' -use-iterative-bfi-inference -disable-output 2>&1 | FileCheck %s --check-prefix=CHECK3 |
| |
| ; The C++ code for this test case is from c-parse.c in 403.gcc (SPEC2006) |
| ; The problem with BFI for the test is solved by applying iterative inference. |
| ; The corresponding CFG graph is shown below, with intended counts for every |
| ; basic block. The hot loop, b3->b4->b2, is not getting proper (large) counts |
| ; unless the -use-iterative-bfi-inference option is specified. |
| ; |
| ; +-------------------------------------------+ |
| ; | | |
| ; | +----------+ | |
| ; | | b1 [1] | | |
| ; | +----------+ | |
| ; | | | |
| ; | | | |
| ; | v | |
| ; | +----------+ | |
| ; | +------------> | b2 [625] | -+ | |
| ; | | +----------+ | | |
| ; | | | | | |
| ; | | | | | |
| ; | | v | | |
| ; | +----------+ +----------+ | | |
| ; | | b4 [624] | <-- | b3 [625] | <+---------+ |
| ; | +----------+ +----------+ | |
| ; | | | |
| ; +----+ | | |
| ; | v v |
| ; +----------+ +--------------------+ |
| ; | b8 [1] | <-- | b7 [2] | |
| ; +----------+ +--------------------+ |
| ; | ^ |
| ; | | |
| ; v | |
| ; +----------+ +----------+ | |
| ; | b9 [1] | <-- | b5 [2] | | |
| ; +----------+ +----------+ | |
| ; | | |
| ; | | |
| ; v | |
| ; +----------+ | |
| ; | b6 [1] | -+ |
| ; +----------+ |
| |
| @yydebug = dso_local global i32 0, align 4 |
| |
| ; Function Attrs: noinline nounwind uwtable |
| define dso_local i32 @yyparse_1() #0 { |
| b1: |
| call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 1, i32 0, i64 -1) |
| %0 = load i32, ptr @yydebug, align 4 |
| %cmp = icmp ne i32 %0, 0 |
| br label %b2 |
| ; CHECK: - b1: float = {{.*}}, int = {{.*}}, count = 1 |
| |
| b2: |
| call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 2, i32 0, i64 -1) |
| br i1 %cmp, label %b7, label %b3 |
| ; CHECK: - b2: float = {{.*}}, int = {{.*}}, count = 625 |
| |
| b3: |
| call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 3, i32 0, i64 -1) |
| br i1 %cmp, label %b7, label %b4 |
| ; CHECK: - b3: float = {{.*}}, int = {{.*}}, count = 625 |
| ; CHECK2: br i1 %cmp, label %b7, label %b4, |
| ; CHECK2-SAME: !prof ![[END172_PROF:[0-9]+]] |
| |
| b4: |
| call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 4, i32 0, i64 -1) |
| br label %b2 |
| ; CHECK: - b4: float = {{.*}}, int = {{.*}}, count = 624 |
| |
| b5: |
| call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 5, i32 0, i64 -1) |
| br i1 %cmp, label %b9, label %b6 |
| ; CHECK: - b5: float = {{.*}}, int = {{.*}}, count = 2 |
| |
| b6: |
| call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 6, i32 0, i64 -1) |
| br label %b7 |
| ; CHECK: - b6: float = {{.*}}, int = {{.*}}, count = 1 |
| |
| b7: |
| call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 7, i32 0, i64 -1) |
| br i1 %cmp, label %b5, label %b8 |
| ; CHECK: - b7: float = {{.*}}, int = {{.*}}, count = 2 |
| ; CHECK2: br i1 %cmp, label %b5, label %b8, |
| ; CHECK2-SAME: !prof ![[FALSE4858_PROF:[0-9]+]] |
| |
| b8: |
| call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 8, i32 0, i64 -1) |
| br label %b3 |
| ; CHECK: - b8: float = {{.*}}, int = {{.*}}, count = 1 |
| |
| b9: |
| call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 9, i32 0, i64 -1) |
| %1 = load i32, ptr @yydebug, align 4 |
| ret i32 %1 |
| ; CHECK: - b9: float = {{.*}}, int = {{.*}}, count = 1 |
| |
| } |
| |
| ; Another difficult (for BFI) instance with irreducible loops, |
| ; containing 'indirectbr'. The corresponding CFG graph is shown below, with |
| ; intended counts for every basic block. |
| ; |
| ; +-----------+ |
| ; | b1 [1] | |
| ; +-----------+ |
| ; | |
| ; | |
| ; v |
| ; +------------------------+ |
| ; +- | b2 [86] | <+ |
| ; | +------------------------+ | |
| ; | | | | |
| ; | | | | |
| ; | v | | |
| ; | +-----------+ | | |
| ; | | b3 [8212] | <+-------+ | |
| ; | +-----------+ | | | |
| ; | | | | | |
| ; | | | | | |
| ; | v v | | |
| ; | +------------------------+ | |
| ; | | indirectgoto [17747] | -+ |
| ; | +------------------------+ |
| ; | | ^ | |
| ; | | +--+ |
| ; | v |
| ; | +-----------+ |
| ; +> | b4 [1] | |
| ; +-----------+ |
| |
| ; Function Attrs: nounwind uwtable |
| define dso_local i32 @foo1() #0 !prof !132 { |
| b1: |
| call void @llvm.pseudoprobe(i64 7682762345278052905, i64 1, i32 0, i64 -1) |
| %0 = load i32, ptr @yydebug, align 4 |
| %cmp = icmp ne i32 %0, 0 |
| br label %b2 |
| ; CHECK3: - b1: float = {{.*}}, int = {{.*}}, count = 1 |
| |
| b2: |
| call void @llvm.pseudoprobe(i64 7682762345278052905, i64 2, i32 0, i64 -1) |
| %1 = load i32, ptr @yydebug, align 4 |
| switch i32 %1, label %b4 [ |
| i32 1, label %indirectgoto |
| i32 2, label %b3 |
| ], !prof !133 |
| ; CHECK3: - b2: float = {{.*}}, int = {{.*}}, count = 86 |
| |
| b3: |
| call void @llvm.pseudoprobe(i64 7682762345278052905, i64 3, i32 0, i64 -1) |
| br label %indirectgoto |
| ; CHECK3: - b3: float = {{.*}}, int = {{.*}}, count = 8212 |
| |
| b4: |
| call void @llvm.pseudoprobe(i64 7682762345278052905, i64 4, i32 0, i64 -1) |
| %2 = load i32, ptr @yydebug, align 4 |
| ret i32 %2 |
| ; CHECK3: - b4: float = {{.*}}, int = {{.*}}, count = 1 |
| |
| indirectgoto: |
| %indirect.goto.dest = alloca i8, align 4 |
| call void @llvm.pseudoprobe(i64 7682762345278052905, i64 5, i32 0, i64 -1) |
| indirectbr ptr %indirect.goto.dest, [label %b2, label %indirectgoto, label %b4, label %b3], !prof !134 |
| ; CHECK3: - indirectgoto: float = {{.*}}, int = {{.*}}, count = 17747 |
| |
| } |
| |
| declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1 |
| |
| attributes #0 = { noinline nounwind uwtable "use-sample-profile"} |
| attributes #1 = { nounwind } |
| |
| !llvm.pseudo_probe_desc = !{!1079, !4496} |
| !1079 = !{i64 -7702751003264189226, i64 158496288380146391, !"yyparse_1", null} |
| !4496 = !{i64 7682762345278052905, i64 404850113186107133, !"foo1", null} |
| !132 = !{!"function_entry_count", i64 1} |
| !133 = !{!"branch_weights", i32 0, i32 86, i32 0} |
| !134 = !{!"branch_weights", i32 85, i32 9449, i32 1, i32 8212} |
| |
| ; CHECK2: ![[END172_PROF]] = !{!"branch_weights", i32 1, i32 1003} |
| ; CHECK2: ![[FALSE4858_PROF]] = !{!"branch_weights", i32 2, i32 1} |