| ;; Test callsite context graph generation for call graph with with MIBs |
| ;; that have pruned contexts that partially match multiple inlined |
| ;; callsite contexts, requiring duplication of context ids and nodes |
| ;; while matching callsite nodes onto the graph. This test requires more |
| ;; complex duplication due to multiple contexts for different allocations |
| ;; that share some of the same callsite nodes. |
| ;; |
| ;; Original code looks like: |
| ;; |
| ;; char *D(bool Call1) { |
| ;; if (Call1) |
| ;; return new char[10]; |
| ;; else |
| ;; return new char[10]; |
| ;; } |
| ;; |
| ;; char *C(bool Call1) { |
| ;; return D(Call1); |
| ;; } |
| ;; |
| ;; char *B(bool Call1) { |
| ;; if (Call1) |
| ;; return C(true); |
| ;; else |
| ;; return C(false); |
| ;; } |
| ;; |
| ;; char *A(bool Call1) { |
| ;; return B(Call1); |
| ;; } |
| ;; |
| ;; char *A1() { |
| ;; return A(true); |
| ;; } |
| ;; |
| ;; char *A2() { |
| ;; return A(true); |
| ;; } |
| ;; |
| ;; char *A3() { |
| ;; return A(false); |
| ;; } |
| ;; |
| ;; char *A4() { |
| ;; return A(false); |
| ;; } |
| ;; |
| ;; char *E() { |
| ;; return B(true); |
| ;; } |
| ;; |
| ;; char *F() { |
| ;; return B(false); |
| ;; } |
| ;; |
| ;; int main(int argc, char **argv) { |
| ;; char *a1 = A1(); // cold |
| ;; char *a2 = A2(); // cold |
| ;; char *e = E(); // default |
| ;; char *a3 = A3(); // default |
| ;; char *a4 = A4(); // default |
| ;; char *f = F(); // cold |
| ;; memset(a1, 0, 10); |
| ;; memset(a2, 0, 10); |
| ;; memset(e, 0, 10); |
| ;; memset(a3, 0, 10); |
| ;; memset(a4, 0, 10); |
| ;; memset(f, 0, 10); |
| ;; delete[] a3; |
| ;; delete[] a4; |
| ;; delete[] e; |
| ;; sleep(10); |
| ;; delete[] a1; |
| ;; delete[] a2; |
| ;; delete[] f; |
| ;; return 0; |
| ;; } |
| ;; |
| ;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the |
| ;; memory freed after sleep(10) results in cold lifetimes. |
| ;; |
| ;; The code below was created by forcing inlining of A into its callers, |
| ;; without any other inlining or optimizations. Since both allocation contexts |
| ;; via A for each allocation in D have the same allocation type (cold via |
| ;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second |
| ;; new in D, the contexts for those respective allocations are pruned above A. |
| ;; The allocations via E and F are to ensure we don't prune above B. |
| ;; |
| ;; The matching onto the inlined A[1234]->A sequences will require duplication |
| ;; of the context id assigned to the context from A for each allocation in D. |
| ;; This test ensures that we do this correctly in the presence of callsites |
| ;; shared by the different duplicated context ids (i.e. callsite in C). |
| ;; |
| ;; The IR was then reduced using llvm-reduce with the expected FileCheck input. |
| |
| ; RUN: opt -thinlto-bc %s >%t.o |
| ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ |
| ; RUN: -supports-hot-cold-new \ |
| ; RUN: -r=%t.o,main,plx \ |
| ; RUN: -r=%t.o,_Z1Db,plx \ |
| ; RUN: -r=%t.o,_Z1Cb,plx \ |
| ; RUN: -r=%t.o,_Z1Bb,plx \ |
| ; RUN: -r=%t.o,_Z1Ab,plx \ |
| ; RUN: -r=%t.o,_Z2A1v,plx \ |
| ; RUN: -r=%t.o,_Z2A2v,plx \ |
| ; RUN: -r=%t.o,_Z2A3v,plx \ |
| ; RUN: -r=%t.o,_Z2A4v,plx \ |
| ; RUN: -r=%t.o,_Z1Ev,plx \ |
| ; RUN: -r=%t.o,_Z1Fv,plx \ |
| ; RUN: -r=%t.o,_ZdaPv, \ |
| ; RUN: -r=%t.o,sleep, \ |
| ; RUN: -r=%t.o,_Znam, \ |
| ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ |
| ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ |
| ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP |
| |
| |
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" |
| target triple = "x86_64-unknown-linux-gnu" |
| |
| define ptr @_Z1Db(i1 %Call1) { |
| entry: |
| %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 |
| br label %return |
| |
| if.else: ; No predecessors! |
| %call1 = call ptr @_Znam(i64 0), !memprof !6, !callsite !11 |
| br label %return |
| |
| return: ; preds = %if.else, %entry |
| ret ptr null |
| } |
| |
| declare ptr @_Znam(i64) |
| |
| define ptr @_Z1Cb(i1 %Call1) { |
| entry: |
| %call = call ptr @_Z1Db(i1 false), !callsite !12 |
| ret ptr null |
| } |
| |
| define ptr @_Z1Bb(i1 %Call1) { |
| entry: |
| %call = call ptr @_Z1Cb(i1 false), !callsite !13 |
| br label %return |
| |
| if.else: ; No predecessors! |
| %call1 = call ptr @_Z1Cb(i1 false), !callsite !14 |
| br label %return |
| |
| return: ; preds = %if.else, %entry |
| ret ptr null |
| } |
| |
| define ptr @_Z1Ab() { |
| entry: |
| %call = call ptr @_Z1Bb(i1 false), !callsite !15 |
| ret ptr null |
| } |
| |
| define ptr @_Z2A1v() { |
| entry: |
| %call.i = call ptr @_Z1Bb(i1 false), !callsite !16 |
| ret ptr null |
| } |
| |
| define ptr @_Z2A2v() { |
| entry: |
| %call.i = call ptr @_Z1Bb(i1 false), !callsite !17 |
| ret ptr null |
| } |
| |
| define ptr @_Z2A3v() { |
| entry: |
| %call.i = call ptr @_Z1Bb(i1 false), !callsite !18 |
| ret ptr null |
| } |
| |
| define ptr @_Z2A4v() { |
| entry: |
| %call.i = call ptr @_Z1Bb(i1 false), !callsite !19 |
| ret ptr null |
| } |
| |
| define ptr @_Z1Ev() { |
| entry: |
| %call = call ptr @_Z1Bb(i1 false), !callsite !20 |
| ret ptr null |
| } |
| |
| define ptr @_Z1Fv() { |
| entry: |
| %call = call ptr @_Z1Bb(i1 false), !callsite !21 |
| ret ptr null |
| } |
| |
| declare i32 @main() |
| |
| declare void @_ZdaPv() |
| |
| declare i32 @sleep() |
| |
| ; uselistorder directives |
| uselistorder ptr @_Znam, { 1, 0 } |
| |
| !0 = !{!1, !3} |
| !1 = !{!2, !"notcold"} |
| !2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781} |
| !3 = !{!4, !"cold"} |
| !4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978} |
| !5 = !{i64 4854880825882961848} |
| !6 = !{!7, !9} |
| !7 = !{!8, !"notcold"} |
| !8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978} |
| !9 = !{!10, !"cold"} |
| !10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832} |
| !11 = !{i64 -8775068539491628272} |
| !12 = !{i64 -904694911315397047} |
| !13 = !{i64 6532298921261778285} |
| !14 = !{i64 7859682663773658275} |
| !15 = !{i64 -6528110295079665978} |
| !16 = !{i64 -6528110295079665978, i64 5747919905719679568} |
| !17 = !{i64 -6528110295079665978, i64 -5753238080028016843} |
| !18 = !{i64 -6528110295079665978, i64 1794685869326395337} |
| !19 = !{i64 -6528110295079665978, i64 5462047985461644151} |
| !20 = !{i64 1905834578520680781} |
| !21 = !{i64 -4903163940066524832} |
| |
| |
| ;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only |
| ;; match the interesting parts of the pre-update graph here). |
| |
| ; DUMP: CCG before updating call stack chains: |
| ; DUMP: Callsite Context Graph: |
| |
| ; DUMP: Node [[D1:0x[a-z0-9]+]] |
| ; DUMP: Versions: 1 MIB: |
| ; DUMP: AllocType 1 StackIds: 0, 1, 2 |
| ; DUMP: AllocType 2 StackIds: 0, 1, 3 |
| ; DUMP: (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 1 2 |
| |
| ; DUMP: Node [[C:0x[a-z0-9]+]] |
| ; DUMP: null Call |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 1 2 3 4 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 |
| ; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 |
| |
| ; DUMP: Node [[D2]] |
| ; DUMP: Versions: 1 MIB: |
| ; DUMP: AllocType 1 StackIds: 0, 4, 3 |
| ; DUMP: AllocType 2 StackIds: 0, 4, 5 |
| ; DUMP: (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 3 4 |
| |
| |
| ;; After updating for callsite metadata, we should have duplicated the context |
| ;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A, |
| ;; and used those on new nodes for those callers. Note that while in reality |
| ;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4, |
| ;; due to the pruning we have lost this information and thus end up duplicating |
| ;; both of A's contexts to all of the new nodes (which could result in some |
| ;; unnecessary cloning. |
| |
| ; DUMP: CCG before cloning: |
| ; DUMP: Callsite Context Graph: |
| ; DUMP: Node [[D1]] |
| ; DUMP: Versions: 1 MIB: |
| ; DUMP: AllocType 1 StackIds: 0, 1, 2 |
| ; DUMP: AllocType 2 StackIds: 0, 1, 3 |
| ; DUMP: (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 1 2 5 7 9 11 |
| ; DUMP: CalleeEdges: |
| ; DUMP: CallerEdges: |
| ; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 |
| |
| ; DUMP: Node [[C]] |
| ; DUMP: Callee: 11485875876353461977 (_Z1Db) Clones: 0 StackIds: 0 (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 |
| ; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 |
| ; DUMP: CallerEdges: |
| ; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 |
| ; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 |
| |
| ; DUMP: Node [[B1]] |
| ; DUMP: Callee: 15062806102884567440 (_Z1Cb) Clones: 0 StackIds: 1 (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 1 2 5 7 9 11 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 |
| ; DUMP: CallerEdges: |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5 |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7 |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9 |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11 |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 |
| |
| ; DUMP: Node [[E]] |
| ; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 2 (clone 0) |
| ; DUMP: AllocTypes: NotCold |
| ; DUMP: ContextIds: 1 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1 |
| ; DUMP: CallerEdges: |
| |
| ; DUMP: Node [[D2]] |
| ; DUMP: Versions: 1 MIB: |
| ; DUMP: AllocType 1 StackIds: 0, 4, 3 |
| ; DUMP: AllocType 2 StackIds: 0, 4, 5 |
| ; DUMP: (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 3 4 6 8 10 12 |
| ; DUMP: CalleeEdges: |
| ; DUMP: CallerEdges: |
| ; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 |
| |
| ; DUMP: Node [[B2]] |
| ; DUMP: Callee: 15062806102884567440 (_Z1Cb) Clones: 0 StackIds: 4 (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 3 4 6 8 10 12 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 |
| ; DUMP: CallerEdges: |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 |
| |
| ; DUMP: Node [[F]] |
| ; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 5 (clone 0) |
| ; DUMP: AllocTypes: Cold |
| ; DUMP: ContextIds: 4 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4 |
| ; DUMP: CallerEdges: |
| |
| ; DUMP: Node [[A2]] |
| ; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 7 (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 5 6 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5 |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 |
| ; DUMP: CallerEdges: |
| |
| ; DUMP: Node [[A3]] |
| ; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 8 (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 7 8 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7 |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 |
| ; DUMP: CallerEdges: |
| |
| ; DUMP: Node [[A1]] |
| ; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3 (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 9 10 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9 |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 |
| ; DUMP: CallerEdges: |
| |
| ; DUMP: Node [[A4]] |
| ; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 9 (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 11 12 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11 |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 |
| ; DUMP: CallerEdges: |
| |
| ; DUMP: Node [[A]] |
| ; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 6 (clone 0) |
| ; DUMP: AllocTypes: NotColdCold |
| ; DUMP: ContextIds: 2 3 |
| ; DUMP: CalleeEdges: |
| ; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2 |
| ; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 |
| ; DUMP: CallerEdges: |