[libunwind] Support stack unwind in CET environment

Control-flow Enforcement Technology (CET), published by Intel,
introduces shadow stack feature aiming to ensure a return from
a function is directed to where the function was called.
In a CET enabled system, each function call will push return
address into normal stack and shadow stack, when the function
returns, the address stored in shadow stack will be popped and
compared with the return address, program will fail if the 2
addresses don't match.
In exception handling, the control flow may skip some stack frames
and we must adjust shadow stack to avoid violating CET restriction.
In order to achieve this, we count the number of stack frames skipped
and adjust shadow stack by this number before jumping to landing pad.

Reviewed By: hjl.tools, compnerd, MaskRay
Differential Revision: https://reviews.llvm.org/D105968

Signed-off-by: gejin <ge.jin@intel.com>
GitOrigin-RevId: 21b25a1fb32ecd2e1f336123c2715f8ef1a49f97
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a73f5b0..b601738 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,6 +52,7 @@
 
 # Define options.
 option(LIBUNWIND_BUILD_32_BITS "Build 32 bit libunwind" ${LLVM_BUILD_32_BITS})
+option(LIBUNWIND_ENABLE_CET "Build libunwind with CET enabled." OFF)
 option(LIBUNWIND_ENABLE_ASSERTIONS "Enable assertions independent of build mode." ON)
 option(LIBUNWIND_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
 option(LIBUNWIND_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
@@ -93,6 +94,10 @@
   message(FATAL_ERROR "libunwind must be built as either a shared or static library.")
 endif()
 
+if (LIBUNWIND_ENABLE_CET AND MSVC)
+  message(FATAL_ERROR "libunwind CET support is not available for MSVC!")
+endif()
+
 # Check that we can build with 32 bits if requested.
 if (CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32)
   if (LIBUNWIND_BUILD_32_BITS AND NOT LLVM_BUILD_32_BITS) # Don't duplicate the output from LLVM
@@ -176,6 +181,17 @@
 
 add_compile_flags_if_supported(-Werror=return-type)
 
+if (LIBUNWIND_ENABLE_CET)
+  add_compile_flags_if_supported(-fcf-protection=full)
+  add_compile_flags_if_supported(-mshstk)
+  if (NOT LIBUNWIND_SUPPORTS_FCF_PROTECTION_EQ_FULL_FLAG)
+    message(SEND_ERROR "Compiler doesn't support CET -fcf-protection option!")
+  endif()
+  if (NOT LIBUNWIND_SUPPORTS_MSHSTK_FLAG)
+    message(SEND_ERROR "Compiler doesn't support CET -mshstk option!")
+  endif()
+endif()
+
 # Get warning flags
 add_compile_flags_if_supported(-W)
 add_compile_flags_if_supported(-Wall)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 67fa61b..5794038 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -34,6 +34,7 @@
     AddressSpace.hpp
     assembly.h
     CompactUnwinder.hpp
+    cet_unwind.h
     config.h
     dwarf2.h
     DwarfInstructions.hpp
diff --git a/src/Registers.hpp b/src/Registers.hpp
index 0699743..5e2f11f 100644
--- a/src/Registers.hpp
+++ b/src/Registers.hpp
@@ -15,8 +15,9 @@
 #include <stdint.h>
 #include <string.h>
 
-#include "libunwind.h"
+#include "cet_unwind.h"
 #include "config.h"
+#include "libunwind.h"
 
 namespace libunwind {
 
@@ -42,6 +43,13 @@
 #if defined(_LIBUNWIND_TARGET_I386)
 class _LIBUNWIND_HIDDEN Registers_x86;
 extern "C" void __libunwind_Registers_x86_jumpto(Registers_x86 *);
+
+#if defined(_LIBUNWIND_USE_CET)
+extern "C" void *__libunwind_cet_get_jump_target() {
+  return reinterpret_cast<void *>(&__libunwind_Registers_x86_jumpto);
+}
+#endif
+
 /// Registers_x86 holds the register state of a thread in a 32-bit intel
 /// process.
 class _LIBUNWIND_HIDDEN Registers_x86 {
@@ -253,6 +261,13 @@
 /// process.
 class _LIBUNWIND_HIDDEN Registers_x86_64;
 extern "C" void __libunwind_Registers_x86_64_jumpto(Registers_x86_64 *);
+
+#if defined(_LIBUNWIND_USE_CET)
+extern "C" void *__libunwind_cet_get_jump_target() {
+  return reinterpret_cast<void *>(&__libunwind_Registers_x86_64_jumpto);
+}
+#endif
+
 class _LIBUNWIND_HIDDEN Registers_x86_64 {
 public:
   Registers_x86_64();
diff --git a/src/UnwindCursor.hpp b/src/UnwindCursor.hpp
index 8373b61..7157fa9 100644
--- a/src/UnwindCursor.hpp
+++ b/src/UnwindCursor.hpp
@@ -11,6 +11,7 @@
 #ifndef __UNWINDCURSOR_HPP__
 #define __UNWINDCURSOR_HPP__
 
+#include "cet_unwind.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -449,6 +450,12 @@
 #ifdef __arm__
   virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); }
 #endif
+
+#if defined(_LIBUNWIND_USE_CET)
+  virtual void *get_registers() {
+    _LIBUNWIND_ABORT("get_registers not implemented");
+  }
+#endif
 };
 
 #if defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32)
@@ -901,6 +908,9 @@
   virtual void        saveVFPAsX();
 #endif
 
+#if defined(_LIBUNWIND_USE_CET)
+  virtual void *get_registers() { return &_registers; }
+#endif
   // libunwind does not and should not depend on C++ library which means that we
   // need our own defition of inline placement new.
   static void *operator new(size_t, UnwindCursor<A, R> *p) { return p; }
@@ -2125,6 +2135,12 @@
                                          buf, bufLen, offset);
 }
 
+#if defined(_LIBUNWIND_USE_CET)
+extern "C" void *__libunwind_cet_get_registers(unw_cursor_t *cursor) {
+  AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor;
+  return co->get_registers();
+}
+#endif
 } // namespace libunwind
 
 #endif // __UNWINDCURSOR_HPP__
diff --git a/src/UnwindLevel1.c b/src/UnwindLevel1.c
index 8b8797f..9203ac7 100644
--- a/src/UnwindLevel1.c
+++ b/src/UnwindLevel1.c
@@ -25,6 +25,7 @@
 #include <stdio.h>
 #include <string.h>
 
+#include "cet_unwind.h"
 #include "config.h"
 #include "libunwind.h"
 #include "libunwind_ext.h"
@@ -34,6 +35,38 @@
 
 #ifndef _LIBUNWIND_SUPPORT_SEH_UNWIND
 
+// When CET is enabled, each "call" instruction will push return address to
+// CET shadow stack, each "ret" instruction will pop current CET shadow stack
+// top and compare it with target address which program will return.
+// In exception handing, some stack frames will be skipped before jumping to
+// landing pad and we must adjust CET shadow stack accordingly.
+// _LIBUNWIND_POP_CET_SSP is used to adjust CET shadow stack pointer and we
+// directly jump to __libunwind_Registerts_x86/x86_64_jumpto instead of using
+// a regular function call to avoid pushing to CET shadow stack again.
+#if !defined(_LIBUNWIND_USE_CET)
+#define __unw_phase2_resume(cursor, fn) __unw_resume((cursor))
+#elif defined(_LIBUNWIND_TARGET_I386)
+#define __unw_phase2_resume(cursor, fn)                                        \
+  do {                                                                         \
+    _LIBUNWIND_POP_CET_SSP((fn));                                              \
+    void *cetRegContext = __libunwind_cet_get_registers((cursor));             \
+    void *cetJumpAddress = __libunwind_cet_get_jump_target();                  \
+    __asm__ volatile("push %%edi\n\t"                                          \
+                     "sub $4, %%esp\n\t"                                       \
+                     "jmp *%%edx\n\t" :: "D"(cetRegContext),                   \
+                     "d"(cetJumpAddress));                                     \
+  } while (0)
+#elif defined(_LIBUNWIND_TARGET_X86_64)
+#define __unw_phase2_resume(cursor, fn)                                        \
+  do {                                                                         \
+    _LIBUNWIND_POP_CET_SSP((fn));                                              \
+    void *cetRegContext = __libunwind_cet_get_registers((cursor));             \
+    void *cetJumpAddress = __libunwind_cet_get_jump_target();                  \
+    __asm__ volatile("jmpq *%%rdx\n\t" :: "D"(cetRegContext),                  \
+                     "d"(cetJumpAddress));                                     \
+  } while (0)
+#endif
+
 static _Unwind_Reason_Code
 unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
   __unw_init_local(cursor, uc);
@@ -137,6 +170,9 @@
   _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)",
                              (void *)exception_object);
 
+  // uc is initialized by __unw_getcontext in the parent frame. The first stack
+  // frame walked is unwind_phase2.
+  unsigned framesWalked = 1;
   // Walk each frame until we reach where search phase said to stop.
   while (true) {
 
@@ -188,6 +224,7 @@
     }
 #endif
 
+    ++framesWalked;
     // If there is a personality routine, tell it we are unwinding.
     if (frameInfo.handler != 0) {
       _Unwind_Personality_Fn p =
@@ -227,8 +264,9 @@
                                      ", sp=0x%" PRIxPTR,
                                      (void *)exception_object, pc, sp);
         }
-        __unw_resume(cursor);
-        // __unw_resume() only returns if there was an error.
+
+        __unw_phase2_resume(cursor, framesWalked);
+        // __unw_phase2_resume() only returns if there was an error.
         return _URC_FATAL_PHASE2_ERROR;
       default:
         // Personality routine returned an unknown result code.
@@ -250,6 +288,9 @@
                      _Unwind_Stop_Fn stop, void *stop_parameter) {
   __unw_init_local(cursor, uc);
 
+  // uc is initialized by __unw_getcontext in the parent frame. The first stack
+  // frame walked is unwind_phase2_forced.
+  unsigned framesWalked = 1;
   // Walk each frame until we reach where search phase said to stop
   while (__unw_step(cursor) > 0) {
 
@@ -296,6 +337,7 @@
       return _URC_FATAL_PHASE2_ERROR;
     }
 
+    ++framesWalked;
     // If there is a personality routine, tell it we are unwinding.
     if (frameInfo.handler != 0) {
       _Unwind_Personality_Fn p =
@@ -320,7 +362,7 @@
                                    "_URC_INSTALL_CONTEXT",
                                    (void *)exception_object);
         // We may get control back if landing pad calls _Unwind_Resume().
-        __unw_resume(cursor);
+        __unw_phase2_resume(cursor, framesWalked);
         break;
       default:
         // Personality routine returned an unknown result code.
diff --git a/src/UnwindRegistersRestore.S b/src/UnwindRegistersRestore.S
index c2106f3..955ec33 100644
--- a/src/UnwindRegistersRestore.S
+++ b/src/UnwindRegistersRestore.S
@@ -25,6 +25,8 @@
 #  + return address        +
 #  +-----------------------+   <-- SP
 #  +                       +
+
+  _LIBUNWIND_CET_ENDBR
   movl   4(%esp), %eax
   # set up eax and ret on new stack location
   movl  28(%eax), %edx # edx holds new stack pointer
@@ -46,7 +48,8 @@
   # skip ss
   # skip eflags
   pop    %eax  # eax was already pushed on new stack
-  ret        # eip was already pushed on new stack
+  pop    %ecx
+  jmp    *%ecx
   # skip cs
   # skip ds
   # skip es
@@ -70,6 +73,7 @@
 # On entry, thread_state pointer is in rdi
 #endif
 
+  _LIBUNWIND_CET_ENDBR
   movq  56(%rdi), %rax # rax holds new stack pointer
   subq  $16, %rax
   movq  %rax, 56(%rdi)
@@ -119,7 +123,8 @@
 #endif
   movq  56(%rdi), %rsp  # cut back rsp to new location
   pop    %rdi      # rdi was saved here earlier
-  ret            # rip was saved here
+  pop    %rcx
+  jmpq   *%rcx
 
 
 #elif defined(__powerpc64__)
diff --git a/src/UnwindRegistersSave.S b/src/UnwindRegistersSave.S
index f66dc53..e565c8f 100644
--- a/src/UnwindRegistersSave.S
+++ b/src/UnwindRegistersSave.S
@@ -27,6 +27,8 @@
 #   +                       +
 #
 DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
+
+  _LIBUNWIND_CET_ENDBR
   push  %eax
   movl  8(%esp), %eax
   movl  %ebx,  4(%eax)
@@ -70,6 +72,7 @@
 #define TMP %rsi
 #endif
 
+  _LIBUNWIND_CET_ENDBR
   movq  %rax,   (PTR)
   movq  %rbx,  8(PTR)
   movq  %rcx, 16(PTR)
diff --git a/src/assembly.h b/src/assembly.h
index 76ef825..e38d323 100644
--- a/src/assembly.h
+++ b/src/assembly.h
@@ -15,6 +15,13 @@
 #ifndef UNWIND_ASSEMBLY_H
 #define UNWIND_ASSEMBLY_H
 
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__linux__)
+#include <cet.h>
+#define _LIBUNWIND_CET_ENDBR _CET_ENDBR
+#else
+#define _LIBUNWIND_CET_ENDBR
+#endif
+
 #if defined(__powerpc64__)
 #define SEPARATOR ;
 #define PPC64_OFFS_SRR0   0
diff --git a/src/cet_unwind.h b/src/cet_unwind.h
new file mode 100644
index 0000000..eac0bf1
--- /dev/null
+++ b/src/cet_unwind.h
@@ -0,0 +1,40 @@
+//===--------------------------- cet_unwind.h -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBUNWIND_CET_UNWIND_H
+#define LIBUNWIND_CET_UNWIND_H
+
+#include "libunwind.h"
+#include <cet.h>
+#include <immintrin.h>
+
+// Currently, CET is implemented on Linux x86 platforms.
+#if defined(_LIBUNWIND_TARGET_LINUX) && defined(__CET__) && defined(__SHSTK__)
+#define _LIBUNWIND_USE_CET 1
+#endif
+
+#if defined(_LIBUNWIND_USE_CET)
+#define _LIBUNWIND_POP_CET_SSP(x)                                              \
+  do {                                                                         \
+    unsigned long ssp = _get_ssp();                                            \
+    if (ssp != 0) {                                                            \
+      unsigned int tmp = (x);                                                  \
+      while (tmp > 255) {                                                      \
+        _inc_ssp(255);                                                         \
+        tmp -= 255;                                                            \
+      }                                                                        \
+      _inc_ssp(tmp);                                                           \
+    }                                                                          \
+  } while (0)
+#endif
+
+extern void *__libunwind_cet_get_registers(unw_cursor_t *);
+extern void *__libunwind_cet_get_jump_target();
+
+#endif
diff --git a/src/libunwind.cpp b/src/libunwind.cpp
index 1faf000..93e1bc1 100644
--- a/src/libunwind.cpp
+++ b/src/libunwind.cpp
@@ -11,8 +11,8 @@
 
 #include <libunwind.h>
 
-#include "libunwind_ext.h"
 #include "config.h"
+#include "libunwind_ext.h"
 
 #include <stdlib.h>
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index ae83ea9..932a6e3 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -12,6 +12,7 @@
 endif()
 
 pythonize_bool(LIBUNWIND_BUILD_32_BITS)
+pythonize_bool(LIBUNWIND_ENABLE_CET)
 pythonize_bool(LIBCXX_ENABLE_SHARED)
 pythonize_bool(LIBUNWIND_ENABLE_SHARED)
 pythonize_bool(LIBUNWIND_ENABLE_THREADS)
diff --git a/test/libunwind/test/config.py b/test/libunwind/test/config.py
index 18919c2..2aa3b82 100644
--- a/test/libunwind/test/config.py
+++ b/test/libunwind/test/config.py
@@ -50,6 +50,8 @@
         if not self.get_lit_bool('enable_threads', True):
             self.cxx.compile_flags += ['-D_LIBUNWIND_HAS_NO_THREADS']
             self.config.available_features.add('libunwind-no-threads')
+        if self.get_lit_bool('x86_cet', False):
+            self.cxx.compile_flags += ['-fcf-protection=full']
         super(Configuration, self).configure_compile_flags()
 
     def configure_compile_flags_header_includes(self):
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index ef2ebd9..be98070 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -27,6 +27,7 @@
 config.sysroot                  = "@LIBUNWIND_SYSROOT@"
 config.gcc_toolchain            = "@LIBUNWIND_GCC_TOOLCHAIN@"
 config.cxx_ext_threads          = @LIBUNWIND_BUILD_EXTERNAL_THREAD_LIBRARY@
+config.x86_cet                  = @LIBUNWIND_ENABLE_CET@
 
 site.addsitedir(os.path.join(config.libunwind_src_root, 'test'))
 site.addsitedir(os.path.join(config.libcxx_src_root, 'utils'))