| ; RUN: opt < %s -passes='print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_BEFORE |
| ; RUN: opt < %s -passes='loop(loop-rotate),print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_AFTER |
| ; RUN: opt < %s -passes='loop(loop-rotate)' -S | FileCheck %s --check-prefixes=IR |
| |
| @g = global i32 0 |
| |
| ; We should get the same "count =" results for "outer_loop_body" and |
| ; "inner_loop_body" before and after the transformation. |
| |
| ; BFI_BEFORE-LABEL: block-frequency-info: func0 |
| ; BFI_BEFORE: - entry: {{.*}} count = 1 |
| ; BFI_BEFORE: - outer_loop_header: {{.*}} count = 1001 |
| ; BFI_BEFORE: - outer_loop_body: {{.*}} count = 1000 |
| ; BFI_BEFORE: - inner_loop_header: {{.*}} count = 4000 |
| ; BFI_BEFORE: - inner_loop_body: {{.*}} count = 3000 |
| ; BFI_BEFORE: - inner_loop_exit: {{.*}} count = 1000 |
| ; BFI_BEFORE: - outer_loop_exit: {{.*}} count = 1 |
| |
| ; BFI_AFTER-LABEL: block-frequency-info: func0 |
| ; BFI_AFTER: - entry: {{.*}} count = 1 |
| ; BFI_AFTER: - outer_loop_body: {{.*}} count = 1000 |
| ; BFI_AFTER: - inner_loop_body: {{.*}} count = 3000 |
| ; BFI_AFTER: - inner_loop_exit: {{.*}} count = 1000 |
| ; BFI_AFTER: - outer_loop_exit: {{.*}} count = 1 |
| |
| ; IR-LABEL: define void @func0 |
| ; IR: inner_loop_body: |
| ; IR: br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof [[PROF_FUNC0_0:![0-9]+]] |
| ; IR: inner_loop_exit: |
| ; IR: br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof [[PROF_FUNC0_1:![0-9]+]] |
| ; |
| ; A function with known loop-bounds where after loop-rotation we end with an |
| ; unconditional branch in the pre-header. |
| define void @func0() !prof !0 { |
| entry: |
| br label %outer_loop_header |
| |
| outer_loop_header: |
| %i0 = phi i32 [0, %entry], [%i0_inc, %inner_loop_exit] |
| %cmp0 = icmp slt i32 %i0, 1000 |
| br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof !1 |
| |
| outer_loop_body: |
| store volatile i32 %i0, ptr @g, align 4 |
| br label %inner_loop_header |
| |
| inner_loop_header: |
| %i1 = phi i32 [0, %outer_loop_body], [%i1_inc, %inner_loop_body] |
| %cmp1 = icmp slt i32 %i1, 3 |
| br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof !2 |
| |
| inner_loop_body: |
| store volatile i32 %i1, ptr @g, align 4 |
| %i1_inc = add i32 %i1, 1 |
| br label %inner_loop_header |
| |
| inner_loop_exit: |
| %i0_inc = add i32 %i0, 1 |
| br label %outer_loop_header |
| |
| outer_loop_exit: |
| ret void |
| } |
| |
| ; BFI_BEFORE-LABEL: block-frequency-info: func1 |
| ; BFI_BEFORE: - entry: {{.*}} count = 1024 |
| ; BFI_BEFORE: - loop_header: {{.*}} count = 21504 |
| ; BFI_BEFORE: - loop_body: {{.*}} count = 20480 |
| ; BFI_BEFORE: - loop_exit: {{.*}} count = 1024 |
| |
| ; BFI_AFTER-LABEL: block-frequency-info: func1 |
| ; BFI_AFTER: - entry: {{.*}} count = 1024 |
| ; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1024 |
| ; BFI_AFTER: - loop_body: {{.*}} count = 20608 |
| ; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1024 |
| ; BFI_AFTER: - loop_exit: {{.*}} count = 1024 |
| |
| ; IR-LABEL: define void @func1 |
| ; IR: entry: |
| ; IR: br i1 %cmp1, label %loop_body.lr.ph, label %loop_exit, !prof [[PROF_FUNC1_0:![0-9]+]] |
| |
| ; IR: loop_body: |
| ; IR: br i1 %cmp, label %loop_body, label %loop_header.loop_exit_crit_edge, !prof [[PROF_FUNC1_1:![0-9]+]] |
| |
| ; A function with unknown loop-bounds so loop-rotation ends up with a |
| ; condition jump in pre-header and loop body. branch_weight shows body is |
| ; executed more often than header. |
| define void @func1(i32 %n) !prof !3 { |
| entry: |
| br label %loop_header |
| |
| loop_header: |
| %i = phi i32 [0, %entry], [%i_inc, %loop_body] |
| %cmp = icmp slt i32 %i, %n |
| br i1 %cmp, label %loop_body, label %loop_exit, !prof !4 |
| |
| loop_body: |
| store volatile i32 %i, ptr @g, align 4 |
| %i_inc = add i32 %i, 1 |
| br label %loop_header |
| |
| loop_exit: |
| ret void |
| } |
| |
| ; BFI_BEFORE-LABEL: block-frequency-info: func2 |
| ; BFI_BEFORE: - entry: {{.*}} count = 1024 |
| ; BFI_BEFORE: - loop_header: {{.*}} count = 1056 |
| ; BFI_BEFORE: - loop_body: {{.*}} count = 32 |
| ; BFI_BEFORE: - loop_exit: {{.*}} count = 1024 |
| |
| ; BFI_AFTER-LABEL: block-frequency-info: func2 |
| ; - entry: {{.*}} count = 1024 |
| ; - loop_body.lr.ph: {{.*}} count = 32 |
| ; - loop_body: {{.*}} count = 32 |
| ; - loop_header.loop_exit_crit_edge: {{.*}} count = 32 |
| ; - loop_exit: {{.*}} count = 1024 |
| |
| ; IR-LABEL: define void @func2 |
| ; IR: entry: |
| ; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC2_0:![0-9]+]] |
| |
| ; IR: loop_body: |
| ; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC2_1:![0-9]+]] |
| |
| ; A function with unknown loop-bounds so loop-rotation ends up with a |
| ; condition jump in pre-header and loop body. Similar to `func1` but here |
| ; loop-exit count is higher than backedge count. |
| define void @func2(i32 %n) !prof !3 { |
| entry: |
| br label %loop_header |
| |
| loop_header: |
| %i = phi i32 [0, %entry], [%i_inc, %loop_body] |
| %cmp = icmp slt i32 %i, %n |
| br i1 %cmp, label %loop_exit, label %loop_body, !prof !5 |
| |
| loop_body: |
| store volatile i32 %i, ptr @g, align 4 |
| %i_inc = add i32 %i, 1 |
| br label %loop_header |
| |
| loop_exit: |
| ret void |
| } |
| |
| ; BFI_BEFORE-LABEL: block-frequency-info: func3_zero_branch_weight |
| ; BFI_BEFORE: - entry: {{.*}} count = 1024 |
| ; BFI_BEFORE: - loop_header: {{.*}} count = 2199023255296 |
| ; BFI_BEFORE: - loop_body: {{.*}} count = 2199023254272 |
| ; BFI_BEFORE: - loop_exit: {{.*}} count = 1024 |
| |
| ; BFI_AFTER-LABEL: block-frequency-info: func3_zero_branch_weight |
| ; BFI_AFTER: - entry: {{.*}} count = 1024 |
| ; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1024 |
| ; BFI_AFTER: - loop_body: {{.*}} count = 2199023255296 |
| ; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1024 |
| ; BFI_AFTER: - loop_exit: {{.*}} count = 1024 |
| |
| ; IR-LABEL: define void @func3_zero_branch_weight |
| ; IR: entry: |
| ; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC3_0:![0-9]+]] |
| |
| ; IR: loop_body: |
| ; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC3_0]] |
| |
| define void @func3_zero_branch_weight(i32 %n) !prof !3 { |
| entry: |
| br label %loop_header |
| |
| loop_header: |
| %i = phi i32 [0, %entry], [%i_inc, %loop_body] |
| %cmp = icmp slt i32 %i, %n |
| br i1 %cmp, label %loop_exit, label %loop_body, !prof !6 |
| |
| loop_body: |
| store volatile i32 %i, ptr @g, align 4 |
| %i_inc = add i32 %i, 1 |
| br label %loop_header |
| |
| loop_exit: |
| ret void |
| } |
| |
| ; IR-LABEL: define void @func4_zero_branch_weight |
| ; IR: entry: |
| ; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC4_0:![0-9]+]] |
| |
| ; IR: loop_body: |
| ; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC4_0]] |
| |
| define void @func4_zero_branch_weight(i32 %n) !prof !3 { |
| entry: |
| br label %loop_header |
| |
| loop_header: |
| %i = phi i32 [0, %entry], [%i_inc, %loop_body] |
| %cmp = icmp slt i32 %i, %n |
| br i1 %cmp, label %loop_exit, label %loop_body, !prof !7 |
| |
| loop_body: |
| store volatile i32 %i, ptr @g, align 4 |
| %i_inc = add i32 %i, 1 |
| br label %loop_header |
| |
| loop_exit: |
| ret void |
| } |
| |
| ; IR-LABEL: define void @func5_zero_branch_weight |
| ; IR: entry: |
| ; IR: br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC5_0:![0-9]+]] |
| |
| ; IR: loop_body: |
| ; IR: br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC5_0]] |
| |
| define void @func5_zero_branch_weight(i32 %n) !prof !3 { |
| entry: |
| br label %loop_header |
| |
| loop_header: |
| %i = phi i32 [0, %entry], [%i_inc, %loop_body] |
| %cmp = icmp slt i32 %i, %n |
| br i1 %cmp, label %loop_exit, label %loop_body, !prof !8 |
| |
| loop_body: |
| store volatile i32 %i, ptr @g, align 4 |
| %i_inc = add i32 %i, 1 |
| br label %loop_header |
| |
| loop_exit: |
| ret void |
| } |
| |
| !0 = !{!"function_entry_count", i64 1} |
| !1 = !{!"branch_weights", i32 1000, i32 1} |
| !2 = !{!"branch_weights", i32 3000, i32 1000} |
| !3 = !{!"function_entry_count", i64 1024} |
| !4 = !{!"branch_weights", i32 40, i32 2} |
| !5 = !{!"branch_weights", i32 10240, i32 320} |
| !6 = !{!"branch_weights", i32 0, i32 1} |
| !7 = !{!"branch_weights", i32 1, i32 0} |
| !8 = !{!"branch_weights", i32 0, i32 0} |
| |
| ; IR: [[PROF_FUNC0_0]] = !{!"branch_weights", i32 2000, i32 1000} |
| ; IR: [[PROF_FUNC0_1]] = !{!"branch_weights", i32 999, i32 1} |
| ; IR: [[PROF_FUNC1_0]] = !{!"branch_weights", i32 127, i32 1} |
| ; IR: [[PROF_FUNC1_1]] = !{!"branch_weights", i32 2433, i32 127} |
| ; IR: [[PROF_FUNC2_0]] = !{!"branch_weights", i32 9920, i32 320} |
| ; IR: [[PROF_FUNC2_1]] = !{!"branch_weights", i32 320, i32 0} |
| ; IR: [[PROF_FUNC3_0]] = !{!"branch_weights", i32 0, i32 1} |
| ; IR: [[PROF_FUNC4_0]] = !{!"branch_weights", i32 1, i32 0} |
| ; IR: [[PROF_FUNC5_0]] = !{!"branch_weights", i32 0, i32 0} |