blob: 9a72ae43b2f1e485907b6f5779e7efd82a3fb698 [file] [log] [blame]
;; Test context disambiguation for a callgraph containing multiple memprof
;; contexts and no inlining, where we need to perform additional cloning
;; during function assignment/cloning to handle the combination of contexts
;; to 2 different allocations.
;;
;; void E(char **buf1, char **buf2) {
;; *buf1 = new char[10];
;; *buf2 = new char[10];
;; }
;;
;; void B(char **buf1, char **buf2) {
;; E(buf1, buf2);
;; }
;;
;; void C(char **buf1, char **buf2) {
;; E(buf1, buf2);
;; }
;;
;; void D(char **buf1, char **buf2) {
;; E(buf1, buf2);
;; }
;; int main(int argc, char **argv) {
;; char *cold1, *cold2, *default1, *default2, *default3, *default4;
;; B(&default1, &default2);
;; C(&default3, &cold1);
;; D(&cold2, &default4);
;; memset(cold1, 0, 10);
;; memset(cold2, 0, 10);
;; memset(default1, 0, 10);
;; memset(default2, 0, 10);
;; memset(default3, 0, 10);
;; memset(default4, 0, 10);
;; delete[] default1;
;; delete[] default2;
;; delete[] default3;
;; delete[] default4;
;; sleep(10);
;; delete[] cold1;
;; delete[] cold2;
;; return 0;
;; }
;;
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
;; memory freed after sleep(10) results in cold lifetimes.
;;
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
;; -stats requires asserts
; REQUIRES: asserts
; RUN: opt -thinlto-bc %s >%t.o
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t.o,main,plx \
; RUN: -r=%t.o,_ZdaPv, \
; RUN: -r=%t.o,sleep, \
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
;; Try again but with distributed ThinLTO
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -thinlto-distributed-indexes \
; RUN: -r=%t.o,main,plx \
; RUN: -r=%t.o,_ZdaPv, \
; RUN: -r=%t.o,sleep, \
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
; RUN: --check-prefix=STATS
;; Run ThinLTO backend
; RUN: opt -passes=memprof-context-disambiguation \
; RUN: -memprof-import-summary=%t.o.thinlto.bc \
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
; RUN: %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \
; RUN: --check-prefix=STATS-BE --check-prefix=REMARKS
source_filename = "funcassigncloning.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline optnone
define internal void @_Z1EPPcS0_(ptr %buf1, ptr %buf2) #0 {
entry:
%call = call ptr @_Znam(i64 noundef 10), !memprof !0, !callsite !7
%call1 = call ptr @_Znam(i64 noundef 10), !memprof !8, !callsite !15
ret void
}
declare ptr @_Znam(i64)
define internal void @_Z1BPPcS0_() {
entry:
call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !16
ret void
}
define internal void @_Z1CPPcS0_() {
entry:
call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !17
ret void
}
define internal void @_Z1DPPcS0_() {
entry:
call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !18
ret void
}
; Function Attrs: noinline optnone
define i32 @main() #0 {
entry:
call void @_Z1BPPcS0_()
call void @_Z1CPPcS0_()
call void @_Z1DPPcS0_()
ret i32 0
}
declare void @_ZdaPv()
declare i32 @sleep()
; uselistorder directives
uselistorder ptr @_Znam, { 1, 0 }
attributes #0 = { noinline optnone }
!0 = !{!1, !3, !5}
!1 = !{!2, !"cold"}
!2 = !{i64 -3461278137325233666, i64 -7799663586031895603}
!3 = !{!4, !"notcold"}
!4 = !{i64 -3461278137325233666, i64 -3483158674395044949}
!5 = !{!6, !"notcold"}
!6 = !{i64 -3461278137325233666, i64 -2441057035866683071}
!7 = !{i64 -3461278137325233666}
!8 = !{!9, !11, !13}
!9 = !{!10, !"notcold"}
!10 = !{i64 -1415475215210681400, i64 -2441057035866683071}
!11 = !{!12, !"cold"}
!12 = !{i64 -1415475215210681400, i64 -3483158674395044949}
!13 = !{!14, !"notcold"}
!14 = !{i64 -1415475215210681400, i64 -7799663586031895603}
!15 = !{i64 -1415475215210681400}
!16 = !{i64 -2441057035866683071}
!17 = !{i64 -3483158674395044949}
!18 = !{i64 -7799663586031895603}
;; Originally we create a single clone of each call to new from E, since each
;; allocates cold memory for a single caller.
; DUMP: CCG after cloning:
; DUMP: Callsite Context Graph:
; DUMP: Node [[ENEW1ORIG:0x[a-z0-9]+]]
; DUMP: Versions: 1 MIB:
; DUMP: AllocType 2 StackIds: 0
; DUMP: AllocType 1 StackIds: 1
; DUMP: AllocType 1 StackIds: 2
; DUMP: (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 2 3
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
; DUMP: Clones: [[ENEW1CLONE:0x[a-z0-9]+]]
; DUMP: Node [[D:0x[a-z0-9]+]]
; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 0 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 6
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
; DUMP: Edge from Callee [[ENEW2ORIG:0x[a-z0-9]+]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
; DUMP: CallerEdges:
; DUMP: Node [[C]]
; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 1 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 2 5
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C]] AllocTypes: NotCold ContextIds: 2
; DUMP: Edge from Callee [[ENEW2CLONE:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
; DUMP: CallerEdges:
; DUMP: Node [[B]]
; DUMP: Callee: 10758063066234039248 (_Z1EPPcS0_) Clones: 0 StackIds: 2 (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 3 4
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 3
; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
; DUMP: CallerEdges:
; DUMP: Node [[ENEW2ORIG]]
; DUMP: Versions: 1 MIB:
; DUMP: AllocType 1 StackIds: 2
; DUMP: AllocType 2 StackIds: 1
; DUMP: AllocType 1 StackIds: 0
; DUMP: (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 4 6
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
; DUMP: Clones: [[ENEW2CLONE]]
; DUMP: Node [[ENEW1CLONE]]
; DUMP: Versions: 1 MIB:
; DUMP: AllocType 2 StackIds: 0
; DUMP: AllocType 1 StackIds: 1
; DUMP: AllocType 1 StackIds: 2
; DUMP: (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 1
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
; DUMP: Clone of [[ENEW1ORIG]]
; DUMP: Node [[ENEW2CLONE]]
; DUMP: Versions: 1 MIB:
; DUMP: AllocType 1 StackIds: 2
; DUMP: AllocType 2 StackIds: 1
; DUMP: AllocType 1 StackIds: 0
; DUMP: (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 5
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[ENEW2CLONE]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
; DUMP: Clone of [[ENEW2ORIG]]
;; We greedily create a clone of E that is initially used by the clones of the
;; first call to new. However, we end up with an incompatible set of callers
;; given the second call to new which has clones with a different combination of
;; callers. Eventually, we create 2 more clones, and the first clone becomes dead.
; REMARKS: created clone _Z1EPPcS0_.memprof.1
; REMARKS: created clone _Z1EPPcS0_.memprof.2
; REMARKS: created clone _Z1EPPcS0_.memprof.3
; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold
; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute cold
; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute notcold
; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold
; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute notcold
; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute cold
; REMARKS: call in clone _Z1CPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.3
; REMARKS: call in clone _Z1DPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.2
;; Original version of E is used for the non-cold allocations, both from B.
; IR: define internal {{.*}} @_Z1EPPcS0_(
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
; IR: define internal {{.*}} @_Z1BPPcS0_(
; IR: call {{.*}} @_Z1EPPcS0_(
;; C calls a clone of E with the first new allocating cold memory and the
;; second allocating non-cold memory.
; IR: define internal {{.*}} @_Z1CPPcS0_(
; IR: call {{.*}} @_Z1EPPcS0_.memprof.3(
;; D calls a clone of E with the first new allocating non-cold memory and the
;; second allocating cold memory.
; IR: define internal {{.*}} @_Z1DPPcS0_(
; IR: call {{.*}} @_Z1EPPcS0_.memprof.2(
; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.2(
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.3(
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]]
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }
; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
; STATS-BE: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
; STATS-BE: 8 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
; STATS-BE: 1 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
; STATS-BE: 4 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
; STATS-BE: 2 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend