[profile] Support counter relocation at runtime
This is an alternative to the continous mode that was implemented in
D68351. This mode relies on padding and the ability to mmap a file over
the existing mapping which is generally only available on POSIX systems
and isn't suitable for other platforms.
This change instead introduces the ability to relocate counters at
runtime using a level of indirection. On every counter access, we add a
bias to the counter address. This bias is stored in a symbol that's
provided by the profile runtime and is initially set to zero, meaning no
relocation. The runtime can mmap the profile into memory at abitrary
location, and set bias to the offset between the original and the new
counter location, at which point every subsequent counter access will be
to the new location, which allows updating profile directly akin to the
continous mode.
The advantage of this implementation is that doesn't require any special
OS support. The disadvantage is the extra overhead due to additional
instructions required for each counter access (overhead both in terms of
binary size and performance) plus duplication of counters (i.e. one copy
in the binary itself and another copy that's mmapped).
Differential Revision: https://reviews.llvm.org/D69740
diff --git a/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll
new file mode 100644
index 0000000..afe5ff6
--- /dev/null
+++ b/llvm/test/Instrumentation/InstrProfiling/runtime-counter-relocation.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -S -instrprof | FileCheck %s
+; RUN: opt < %s -S -instrprof -runtime-counter-relocation | FileCheck -check-prefixes=RELOC %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+@__profn_foo = hidden constant [3 x i8] c"foo"
+; RELOC: @__llvm_profile_counter_bias = linkonce_odr global i64 0
+
+; CHECK-LABEL: define void @foo
+; CHECK-NEXT: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i64 0, i64 0)
+; CHECK-NEXT: %1 = add i64 %pgocount, 1
+; CHECK-NEXT: store i64 %1, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i64 0, i64 0)
+; RELOC-LABEL: define void @foo
+; RELOC-NEXT: %1 = load i64, i64* @__llvm_profile_counter_bias
+; RELOC-NEXT: %2 = add i64 ptrtoint ([1 x i64]* @__profc_foo to i64), %1
+; RELOC-NEXT: %3 = inttoptr i64 %2 to i64*
+; RELOC-NEXT: %pgocount = load i64, i64* %3
+; RELOC-NEXT: %4 = add i64 %pgocount, 1
+; RELOC-NEXT: store i64 %4, i64* %3
+define void @foo() {
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+ ret void
+}
+
+declare void @llvm.instrprof.increment(i8*, i64, i32, i32)