[DebugInfo][dexter] Add dexter tests for escaped locals

Recently there has been renewed interest in improving debug-info for variables
that (partially or otherwise) live on the stack in optimised code.

At the moment instcombine speculates that stack slots are probably going to be
promoted to registers, and prepares the debug-info accordingly. It runs a
function called LowerDbgDeclare which converts dbg.declares to a set of
dbg.values after loads, and before stores and calls. Sometimes the stack
location remains (e.g. for escaped locals). If any dbg.values become undef
where the stack location is still valid we end up unnecessarily reducing
variable location coverage due to our inability to track multiple locations
simultaneously. There is a flag to disable this feature
(-instcombine-lower-dbg-declare=0), which prevents this conversion at the cost
of sometimes providing incorrect location info in the face of DSE, DCE, GVN,
CSE etc.

This has been discussed fairly extensively on PR34136.

The idea of these tests is to provide examples of situations that we should
consider when designing a new system, to aid discussions and eventually help
evaluate the implementation.

Dexter isn't ideal for observing specific optimisation behaviour. Writing an
exaustive test suite would be difficult, and the resultant suite would be
fragile. However, I think having some concrete executable examples is useful
at least as a reference.

Differential Revision: https://reviews.llvm.org/D89543

GitOrigin-RevId: 66d03af88c2e4932ae53787e07f502b90ab220cc
diff --git a/dexter-tests/memvars/bitcast.c b/dexter-tests/memvars/bitcast.c
new file mode 100644
index 0000000..102f91d
--- /dev/null
+++ b/dexter-tests/memvars/bitcast.c
@@ -0,0 +1,77 @@
+// XFAIL:*
+//// Suboptimal coverage, see description below.
+
+// REQUIRES: lldb
+// UNSUPPORTED: system-windows
+// RUN: %dexter --fail-lt 1.0 -w --debugger lldb \
+// RUN:     --builder 'clang-c' --cflags "-O3 -glldb" -- %s
+
+//// Adapted from https://bugs.llvm.org/show_bug.cgi?id=34136#c1
+//// LowerDbgDeclare has since been updated to look through bitcasts. We still
+//// get suboptimal coverage at the beginning of 'main' though. For each local,
+//// LowerDbgDeclare inserts a dbg.value and a dbg.value+DW_OP_deref before the
+//// store (after the call to 'getint') and the call to 'alias' respectively.
+//// The first dbg.value describes the result of the 'getint' call, eventually
+//// becoming a register location. The second points back into the stack
+//// home. There is a gap in the coverage between the quickly clobbered register
+//// location and the stack location, even though the stack location is valid
+//// during that gap. For x86 we end up with this code at the start of main:
+//// 00000000004004b0 <main>:
+////   4004b0:  sub    rsp,0x18
+////   4004b4:  mov    edi,0x5
+////   4004b9:  call   400480 <getint>
+////   4004be:  mov    DWORD PTR [rsp+0x14],eax
+////   4004c2:  mov    edi,0x5
+////   4004c7:  call   400480 <getint>
+////   4004cc:  mov    DWORD PTR [rsp+0x10],eax
+////   4004d0:  mov    edi,0x5
+////   4004d5:  call   400480 <getint>
+////   4004da:  mov    DWORD PTR [rsp+0xc],eax
+////   ...
+//// With these variable locations:
+////  DW_TAG_variable
+////    DW_AT_location        (0x00000000:
+////       [0x00000000004004be, 0x00000000004004cc): DW_OP_reg0 RAX
+////       [0x00000000004004de, 0x0000000000400503): DW_OP_breg7 RSP+20)
+////    DW_AT_name    ("x")
+////    ...
+////  DW_TAG_variable
+////    DW_AT_location        (0x00000037:
+////       [0x00000000004004cc, 0x00000000004004da): DW_OP_reg0 RAX
+////       [0x00000000004004e8, 0x0000000000400503): DW_OP_breg7 RSP+16)
+////    DW_AT_name    ("y")
+////    ...
+////  DW_TAG_variable
+////    DW_AT_location        (0x0000006e:
+////       [0x00000000004004da, 0x00000000004004e8): DW_OP_reg0 RAX
+////       [0x00000000004004f2, 0x0000000000400503): DW_OP_breg7 RSP+12)
+////    DW_AT_name    ("z")
+////    ...
+
+char g = 1;
+int five = 5;
+__attribute__((__noinline__))
+int getint(int x) {
+  g = x - 4;
+  return x * g;
+}
+
+__attribute__((__noinline__))
+void alias(char* c) {
+  g = *c;
+  *c = (char)five;
+}
+
+int main() {
+  int x = getint(5);
+  int y = getint(5); // DexLabel('s1')
+  int z = getint(5); // DexLabel('s2')
+  alias((char*)&x);  // DexLabel('s3')
+  alias((char*)&y);
+  alias((char*)&z);
+  return 0;          // DexLabel('s4')
+}
+
+// DexExpectWatchValue('x', '5',  from_line='s1', to_line='s4')
+// DexExpectWatchValue('y', '5',  from_line='s2', to_line='s4')
+// DexExpectWatchValue('z', '5',  from_line='s3', to_line='s4')
diff --git a/dexter-tests/memvars/const-branch.c b/dexter-tests/memvars/const-branch.c
new file mode 100644
index 0000000..afa9bfe
--- /dev/null
+++ b/dexter-tests/memvars/const-branch.c
@@ -0,0 +1,52 @@
+// XFAIL:*
+//// Suboptimal coverage, see inlined comments.
+
+// REQUIRES: lldb
+// UNSUPPORTED: system-windows
+// RUN: %dexter --fail-lt 1.0 -w --debugger lldb \
+// RUN:     --builder 'clang-c' --cflags "-O3 -glldb" -- %s
+
+//// Adapted from https://bugs.llvm.org/show_bug.cgi?id=34136#c4
+
+int g;
+
+__attribute__((__noinline__))
+void esc(int* p) {
+  g = *p;
+  *p = 5;
+}
+
+__attribute__((__noinline__))
+void thing(int x) {
+  g = x;
+}
+
+__attribute__((__noinline__))
+int fun(int param) {
+  esc(&param);      //// alloca is live until here        DexLabel('s1')
+  if (param == 0) { //// end of alloca live range
+    //// param is now a constant, but without lowering to dbg.value we can't
+    //// capture that and would still point to the stack slot that may even have
+    //// been reused by now.
+    ////
+    //// Right now we get suboptimal coverage for x86: the param load below is
+    //// CSE'd with the if condition.
+    //// Instcombine runs LowerDbgDeclare and inserts a dbg.value after the load.
+    //// SelectionDAG combines the load and cmp. We go from this IR:
+    ////   %0 = load i32, i32* %param.addr, align 4, !dbg !42, !tbaa !20
+    ////   call void @llvm.dbg.value(metadata i32 %0, ...
+    ////   %cmp = icmp eq i32 %0, 0, !dbg !44
+    //// to this MIR:
+    ////   DBG_VALUE $noreg, $noreg, !"param"...
+    ////   CMP32mi8 %param.addr, 1, $noreg, 0, $noreg, 0, implicit-def $eflags, debug-location !44
+    thing(param);
+  }
+  return 0; //                                            DexLabel('s2')
+}
+
+int main() {
+  return fun(5);
+}
+
+// DexExpectWatchValue('param', '5',  from_line='s1', to_line='s2')
+
diff --git a/dexter-tests/memvars/ctrl-flow.c b/dexter-tests/memvars/ctrl-flow.c
new file mode 100644
index 0000000..80e695d
--- /dev/null
+++ b/dexter-tests/memvars/ctrl-flow.c
@@ -0,0 +1,34 @@
+// REQUIRES: lldb
+// UNSUPPORTED: system-windows
+// RUN: %dexter --fail-lt 1.0 -w --debugger lldb \
+// RUN:     --builder clang-c  --cflags "-O2 -glldb" -- %s
+
+//// Check that we give good locations to a variable ('local') which is escaped
+//// down some control paths and not others. This example is handled well currently.
+
+int g;
+__attribute__((__noinline__))
+void leak(int *ptr) {
+  g = *ptr;
+  *ptr = 2;
+}
+
+__attribute__((__noinline__))
+int fun(int cond) {
+  int local = 0;   // DexLabel('s1')
+  if (cond)
+    leak(&local);
+  else
+    local = 1;
+  return local;    // DexLabel('s2')
+}
+
+int main() {
+  int a = fun(1);
+  int b = fun(0);
+  return a + b;
+}
+
+////                           fun(1)  fun(0)
+// DexExpectWatchValue('local',   '0',    '0', on_line='s1')
+// DexExpectWatchValue('local',   '2',    '1', on_line='s2')
diff --git a/dexter-tests/memvars/implicit-ptr.c b/dexter-tests/memvars/implicit-ptr.c
new file mode 100644
index 0000000..92a3984
--- /dev/null
+++ b/dexter-tests/memvars/implicit-ptr.c
@@ -0,0 +1,45 @@
+// XFAIL:*
+//// We don't yet support DW_OP_implicit_pointer in llvm.
+
+// REQUIRES: lldb
+// UNSUPPORTED: system-windows
+// RUN: %dexter --fail-lt 1.0 -w --debugger lldb \
+// RUN:     --builder 'clang-c'  --cflags "-O3 -glldb" -- %s
+
+//// Check that 'param' in 'fun' can be read throughout, and that 'pa' and 'pb'
+//// can be dereferenced in the debugger even if we can't provide the pointer
+//// value itself.
+
+int globa;
+int globb;
+
+//// A no-inline, read-only function with internal linkage is a good candidate
+//// for arg promotion.
+__attribute__((__noinline__))
+static void use_promote(const int* pa) {
+  //// Promoted args would be a good candidate for an DW_OP_implicit_pointer.
+  globa = *pa; // DexLabel('s2')
+}
+
+__attribute__((__always_inline__))
+static void use_inline(const int* pb) {
+  //// Inlined pointer to callee local would be a good candidate for an
+  //// DW_OP_implicit_pointer.
+  globb = *pb; // DexLabel('s3')
+}
+
+__attribute__((__noinline__))
+int fun(int param) {
+  volatile int step = 0;   // DexLabel('s1')
+  use_promote(&param);
+  use_inline(&param);
+  return step;             // DexLabel('s4')
+}
+
+int main() {
+  return fun(5);
+}
+
+// DexExpectWatchValue('param', 5, from_line='s1', to_line='s4')
+// DexExpectWatchValue('*pa', 5, on_line='s2')
+// DexExpectWatchValue('*pb', 5, on_line='s3')
diff --git a/dexter-tests/memvars/inlining-dse.c b/dexter-tests/memvars/inlining-dse.c
new file mode 100644
index 0000000..07a65ae
--- /dev/null
+++ b/dexter-tests/memvars/inlining-dse.c
@@ -0,0 +1,52 @@
+// XFAIL:*
+//// See PR47946.
+
+// REQUIRES: lldb
+// UNSUPPORTED: system-windows
+// RUN: %dexter --fail-lt 1.0 -w --debugger lldb \
+// RUN:     --builder clang-c  --cflags "-O2 -glldb" -- %s
+//
+//// Check that once-escaped variable 'param' can still be read after we
+//// perform inlining + mem2reg, and that we see the DSE'd value 255.
+
+
+int g;
+__attribute__((__always_inline__))
+static void use(int* p) {
+  g = *p;
+  *p = 255;
+  volatile int step = 0;  // DexLabel('use1')
+}
+
+__attribute__((__noinline__))
+void fun(int param) {
+  //// Make sure first step is in 'fun'.
+  volatile int step = 0;  // DexLabel('fun1')
+  use(&param);
+  return;                 // DexLabel('fun2')
+}
+
+int main() {
+  fun(5);
+}
+
+/*
+# Expect param == 5 before stepping through inlined 'use'.
+DexExpectWatchValue('param', '5', on_line='fun1')
+
+# Expect param == 255 after assignment in inlined frame 'use'.
+DexExpectProgramState({
+  'frames': [
+    { 'function': 'use',
+      'location': { 'lineno': 'use1' },
+    },
+    { 'function': 'fun',
+      'location': { 'lineno': 20 },
+      'watches':  { 'param': '255' }
+    },
+  ]
+})
+
+# Expect param == 255 after inlined call to 'use'.
+DexExpectWatchValue('param', '255', on_line='fun2')
+*/
diff --git a/dexter-tests/memvars/inlining.c b/dexter-tests/memvars/inlining.c
new file mode 100644
index 0000000..757e2ce
--- /dev/null
+++ b/dexter-tests/memvars/inlining.c
@@ -0,0 +1,26 @@
+// REQUIRES: lldb
+// UNSUPPORTED: system-windows
+// RUN: %dexter --fail-lt 1.0 -w --debugger lldb \
+// RUN:     --builder clang-c  --cflags "-O2 -glldb" -- %s
+//
+//// Check that the once-escaped variable 'param' can still be read after
+//// we perform inlining + mem2reg. See D89810 and D85555.
+
+int g;
+__attribute__((__always_inline__))
+static void use(int* p) {
+  g = *p;
+}
+
+__attribute__((__noinline__))
+void fun(int param) {
+  volatile int step1 = 0;  // DexLabel('s1')
+  use(&param);
+  volatile int step2 = 0;  // DexLabel('s2')
+}
+
+int main() {
+  fun(5);
+}
+
+// DexExpectWatchValue('param', '5', from_line='s1', to_line='s2')
diff --git a/dexter-tests/memvars/loop.c b/dexter-tests/memvars/loop.c
new file mode 100644
index 0000000..943478c
--- /dev/null
+++ b/dexter-tests/memvars/loop.c
@@ -0,0 +1,56 @@
+// XFAIL: *
+//// Suboptimal coverage, see below.
+
+// REQUIRES: lldb
+// UNSUPPORTED: system-windows
+// RUN: %dexter --fail-lt 1.0 -w --debugger lldb \
+// RUN:     --builder 'clang-c'  --cflags "-O3 -glldb" -- %s
+
+//// Check that escaped local 'param' in function 'fun' has sensible debug info
+//// after the escaping function 'use' gets arg promotion (int* -> int). Currently
+//// we lose track of param after the loop header.
+
+int g = 0;
+//// A no-inline, read-only function with internal linkage is a good candidate
+//// for arg promotion.
+__attribute__((__noinline__))
+static void use(const int* p) {
+  //// Promoted args would be a good candidate for an DW_OP_implicit_pointer.
+  //// This desirable behaviour is checked for in the test implicit-ptr.c.
+  g = *p;
+}
+
+__attribute__((__noinline__))
+void do_thing(int x) {
+  g *= x;
+}
+
+__attribute__((__noinline__))
+int fun(int param) {
+  do_thing(0);                        // DexLabel('s2')
+  for (int i = 0; i < param; ++i) {
+    use(&param);
+  }
+
+  //// x86 loop body looks like this, with param in ebx:
+  //// 4004b0: mov    edi,ebx
+  //// 4004b2: call   4004d0 <_ZL3usePKi>
+  //// 4004b7: add    ebp,0xffffffff
+  //// 4004ba: jne    4004b0 <_Z3funi+0x20>
+
+  //// But we lose track of param's location before the loop:
+  //// DW_TAG_formal_parameter
+  //// DW_AT_location   (0x00000039:
+  ////    [0x0000000000400490, 0x0000000000400495): DW_OP_reg5 RDI
+  ////    [0x0000000000400495, 0x00000000004004a2): DW_OP_reg3 RBX)
+  //// DW_AT_name       ("param")
+
+  return g;                           // DexLabel('s3')
+}
+
+int main() {
+  return fun(5);
+}
+
+// DexExpectWatchValue('*p', 5, 5, 5, 5, 5, on_line='s1')
+// DexExpectWatchValue('param', 5, from_line='s2', to_line='s3')
diff --git a/dexter-tests/memvars/ptr-to.c b/dexter-tests/memvars/ptr-to.c
new file mode 100644
index 0000000..143224f
--- /dev/null
+++ b/dexter-tests/memvars/ptr-to.c
@@ -0,0 +1,35 @@
+// XFAIL:*
+//// Currently debug info for 'local' behaves, but 'plocal' dereferences to
+//// the incorrect value 0xFF after the call to esc.
+
+// REQUIRES: lldb
+// UNSUPPORTED: system-windows
+// RUN: %dexter --fail-lt 1.0 -w --debugger lldb \
+// RUN:     --builder clang-c --cflags "-O2 -glldb" -- %s
+//
+//// Check that a pointer to a variable living on the stack dereferences to the
+//// variable value.
+
+int glob;
+__attribute__((__noinline__))
+void esc(int* p) {
+  glob = *p;
+  *p = 0xFF;
+}
+
+int main() {
+  int local = 0xA;
+  int *plocal = &local;
+  esc(plocal);      // DexLabel('s1')
+  local = 0xB;      //// DSE
+  return 0;         // DexLabel('s2')
+}
+
+
+// DexExpectWatchValue('local', 0xA, on_line='s1')
+// DexExpectWatchValue('local', 0xB, on_line='s2')
+// DexExpectWatchValue('*plocal', 0xA, on_line='s1')
+// DexExpectWatchValue('*plocal', 0xB, on_line='s2')
+//// Ideally we should be able to observe the dead store to local (0xB) through
+//// plocal here.
+// DexExpectWatchValue('(local == *plocal)', 'true', from_line='s1', to_line='s2')
diff --git a/dexter-tests/memvars/struct-dse.c b/dexter-tests/memvars/struct-dse.c
new file mode 100644
index 0000000..daf3c8d
--- /dev/null
+++ b/dexter-tests/memvars/struct-dse.c
@@ -0,0 +1,33 @@
+// XFAIL:*
+//// Currently, LowerDbgDeclare doesn't lower dbg.declares pointing at allocas
+//// for structs.
+
+// REQUIRES: lldb
+// UNSUPPORTED: system-windows
+// RUN: %dexter --fail-lt 1.0 -w --debugger lldb \
+// RUN:     --builder clang-c --cflags "-O2 -glldb" -- %s
+//
+//// Check debug-info for the escaped struct variable num is reasonable.
+
+#include <stdio.h>
+struct Nums { int a, b, c; };
+struct Nums glob;
+__attribute__((__noinline__))
+void esc(struct Nums* nums) {
+  glob = *nums;
+}
+
+__attribute__((__noinline__))
+int main() {
+  struct Nums nums = { .c=1 };       //// Dead store.
+  printf("s1 nums.c: %d\n", nums.c); // DexLabel('s1')
+
+  nums.c = 2;                        //// Killing store.
+  printf("s2 nums.c: %d\n", nums.c); // DexLabel('s2')
+
+  esc(&nums);                        //// Force nums to live on the stack.
+  return 0;                          // DexLabel('s3')
+}
+
+// DexExpectWatchValue('nums.c', '1', on_line='s1')
+// DexExpectWatchValue('nums.c', '2', from_line='s2', to_line='s3')