| ; This test is similar to cgdata-read-double-outline.ll, but it is executed with LTO (Link Time Optimization). |
| ; It demonstrates how identical instruction sequences are handled during global outlining. |
| ; Currently, we do not attempt to reuse an outlined function for identical sequences. |
| ; Instead, each instruction sequence that appears in the global outlined hash tree |
| ; is outlined into its own unique function. |
| |
| ; RUN: split-file %s %t |
| |
| ; We first create the cgdata file from a local outline instance in local-two.ll |
| ; RUN: opt -module-summary %t/local-two.ll -o %t/write.bc |
| ; RUN: llvm-lto2 run %t/write.bc -o %t/write \ |
| ; RUN: -r %t/write.bc,_f1,px -r %t/write.bc,_f2,px -r %t/write.bc,_g,p \ |
| ; RUN: -codegen-data-generate=true |
| ; RUN: llvm-cgdata --merge %t/write.1 -o %t_cgdata |
| ; RUN: llvm-cgdata --show %t_cgdata | FileCheck %s --check-prefix=SHOW |
| |
| ; SHOW: Outlined hash tree: |
| ; SHOW-NEXT: Total Node Count: 4 |
| ; SHOW-NEXT: Terminal Node Count: 1 |
| ; SHOW-NEXT: Depth: 3 |
| |
| ; Now, we execute either ThinLTO or LTO by reading the cgdata for local-two-another.ll. |
| ; With ThinLTO, similar to the no-LTO scenario shown in cgdata-read-double-outline.ll, |
| ; it optimistically outlines each instruction sequence that matches against |
| ; the global outlined hash tree. Since each matching sequence is considered a candidate, |
| ; we expect to generate two unique outlined functions that will be folded |
| ; by the linker at a later stage. |
| ; However, with LTO, we do not utilize the cgdata, but instead fall back to the default |
| ; outliner mode. This results in a single outlined function that is |
| ; shared across two call-sites. |
| |
| ; Run ThinLTO |
| ; RUN: opt -module-summary %t/local-two-another.ll -o %t/thinlto.bc |
| ; RUN: llvm-lto2 run %t/thinlto.bc -o %t/thinlto \ |
| ; RUN: -r %t/thinlto.bc,_f3,px -r %t/thinlto.bc,_f4,px -r %t/thinlto.bc,_g,p \ |
| ; RUN: -codegen-data-use-path=%t_cgdata |
| ; RUN: llvm-objdump -d %t/thinlto.1 | FileCheck %s |
| |
| ; CHECK: _OUTLINED_FUNCTION_{{.*}}: |
| ; CHECK-NEXT: mov |
| ; CHECK-NEXT: mov |
| ; CHECK-NEXT: b |
| ; CHECK: _OUTLINED_FUNCTION_{{.*}}: |
| ; CHECK-NEXT: mov |
| ; CHECK-NEXT: mov |
| ; CHECK-NEXT: b |
| |
| ; Run ThinLTO while disabling the global outliner. |
| ; We have a single outlined case with the default outliner. |
| ; RUN: llvm-lto2 run %t/thinlto.bc -o %t/thinlto-disable \ |
| ; RUN: -r %t/thinlto.bc,_f3,px -r %t/thinlto.bc,_f4,px -r %t/thinlto.bc,_g,p \ |
| ; RUN: -enable-machine-outliner \ |
| ; RUN: -codegen-data-use-path=%t_cgdata \ |
| ; RUN: -disable-global-outlining |
| ; RUN: llvm-objdump -d %t/thinlto-disable.1 | FileCheck %s --check-prefix=DISABLE |
| |
| ; DISABLE: _OUTLINED_FUNCTION_{{.*}}: |
| ; DISABLE-NEXT: mov |
| ; DISABLE-NEXT: mov |
| ; DISABLE-NEXT: b |
| ; DISABLE-NOT: _OUTLINED_FUNCTION_{{.*}}: |
| |
| ; Run LTO, which effectively disables the global outliner. |
| ; RUN: opt %t/local-two-another.ll -o %t/lto.bc |
| ; RUN: llvm-lto2 run %t/lto.bc -o %t/lto \ |
| ; RUN: -r %t/lto.bc,_f3,px -r %t/lto.bc,_f4,px -r %t/lto.bc,_g,p \ |
| ; RUN: -enable-machine-outliner \ |
| ; RUN: -codegen-data-use-path=%t_cgdata |
| ; RUN: llvm-objdump -d %t/lto.0 | FileCheck %s --check-prefix=DISABLE |
| |
| ;--- local-two.ll |
| target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" |
| target triple = "arm64-apple-darwin" |
| declare i32 @g(i32, i32, i32) |
| define i32 @f1() minsize { |
| %1 = call i32 @g(i32 10, i32 1, i32 2); |
| ret i32 %1 |
| } |
| define i32 @f2() minsize { |
| %1 = call i32 @g(i32 20, i32 1, i32 2); |
| ret i32 %1 |
| } |
| |
| ;--- local-two-another.ll |
| target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" |
| target triple = "arm64-apple-darwin" |
| |
| declare i32 @g(i32, i32, i32) |
| define i32 @f3() minsize { |
| %1 = call i32 @g(i32 30, i32 1, i32 2); |
| ret i32 %1 |
| } |
| define i32 @f4() minsize { |
| %1 = call i32 @g(i32 40, i32 1, i32 2); |
| ret i32 %1 |
| } |