[X86] Support ``-mindirect-branch-cs-prefix`` for call and jmp to indirect thunk

This is to address feature request from https://github.com/ClangBuiltLinux/linux/issues/1665

Reviewed By: nickdesaulniers, MaskRay

Differential Revision: https://reviews.llvm.org/D130754

GitOrigin-RevId: 6f867f9102838ebe314c1f3661fdf95700386e5a
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 0972e80..76ddb3d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -2440,6 +2440,9 @@
   if (OutStreamer->isVerboseAsm())
     addConstantComments(MI, *OutStreamer);
 
+  bool IndCS =
+      MF->getMMI().getModule()->getModuleFlag("indirect_branch_cs_prefix");
+
   switch (MI->getOpcode()) {
   case TargetOpcode::DBG_VALUE:
     llvm_unreachable("Should be handled target independently");
@@ -2488,13 +2491,16 @@
     break;
   }
 
+  case X86::TAILJMPd64:
+    if (IndCS && MI->hasRegisterImplicitUseOperand(X86::R11))
+      EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
+    LLVM_FALLTHROUGH;
   case X86::TAILJMPr:
   case X86::TAILJMPm:
   case X86::TAILJMPd:
   case X86::TAILJMPd_CC:
   case X86::TAILJMPr64:
   case X86::TAILJMPm64:
-  case X86::TAILJMPd64:
   case X86::TAILJMPd64_CC:
   case X86::TAILJMPr64_REX:
   case X86::TAILJMPm64_REX:
@@ -2668,6 +2674,10 @@
                                 .addImm(MI->getOperand(0).getImm())
                                 .addReg(X86::NoRegister));
     return;
+  case X86::CALL64pcrel32:
+    if (IndCS && MI->hasRegisterImplicitUseOperand(X86::R11))
+      EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
+    break;
   }
 
   MCInst TmpInst;
diff --git a/lib/Target/X86/X86ReturnThunks.cpp b/lib/Target/X86/X86ReturnThunks.cpp
index 4b20322..0107fab 100644
--- a/lib/Target/X86/X86ReturnThunks.cpp
+++ b/lib/Target/X86/X86ReturnThunks.cpp
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/Debug.h"
 
@@ -73,9 +74,14 @@
       if (Term.getOpcode() == RetOpc)
         Rets.push_back(&Term);
 
+  bool IndCS =
+      MF.getMMI().getModule()->getModuleFlag("indirect_branch_cs_prefix");
+  const MCInstrDesc &CS = ST.getInstrInfo()->get(X86::CS_PREFIX);
   const MCInstrDesc &JMP = ST.getInstrInfo()->get(X86::TAILJMPd);
 
   for (MachineInstr *Ret : Rets) {
+    if (IndCS)
+      BuildMI(Ret->getParent(), Ret->getDebugLoc(), CS);
     BuildMI(Ret->getParent(), Ret->getDebugLoc(), JMP)
         .addExternalSymbol(ThunkName.data());
     Ret->eraseFromParent();
diff --git a/test/CodeGen/X86/attr-function-return.ll b/test/CodeGen/X86/attr-function-return.ll
index f40d971..091a8ea 100644
--- a/test/CodeGen/X86/attr-function-return.ll
+++ b/test/CodeGen/X86/attr-function-return.ll
@@ -6,6 +6,11 @@
 define void @x() fn_ret_thunk_extern {
 ; CHECK-LABEL: x:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    cs
 ; CHECK-NEXT:    jmp __x86_return_thunk
   ret void
 }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 4, !"indirect_branch_cs_prefix", i32 1}
diff --git a/test/CodeGen/X86/lvi-hardening-indirectbr.ll b/test/CodeGen/X86/lvi-hardening-indirectbr.ll
index 7bc0fe7..b6e6e61 100644
--- a/test/CodeGen/X86/lvi-hardening-indirectbr.ll
+++ b/test/CodeGen/X86/lvi-hardening-indirectbr.ll
@@ -22,18 +22,22 @@
 ; X64:       callq bar
 ; X64-DAG:   movl %[[x]], %edi
 ; X64-DAG:   movq %[[fp]], %r11
-; X64:       callq __llvm_lvi_thunk_r11
+; X64:       cs
+; X64-NEXT:  callq __llvm_lvi_thunk_r11
 ; X64:       movl %[[x]], %edi
 ; X64:       callq bar
 ; X64-DAG:   movl %[[x]], %edi
 ; X64-DAG:   movq %[[fp]], %r11
-; X64:       jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64:       cs
+; X64-NEXT:  jmp __llvm_lvi_thunk_r11 # TAILCALL
 
 ; X64FAST-LABEL: icall_reg:
 ; X64FAST:       callq bar
-; X64FAST:       callq __llvm_lvi_thunk_r11
+; X64FAST:       cs
+; X64FAST-NEXT:  callq __llvm_lvi_thunk_r11
 ; X64FAST:       callq bar
-; X64FAST:       jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64FAST:       cs
+; X64FAST-NEXT:  jmp __llvm_lvi_thunk_r11 # TAILCALL
 
 
 @global_fp = external dso_local global ptr
@@ -50,16 +54,20 @@
 ; X64-LABEL: icall_global_fp:
 ; X64-DAG:   movl %edi, %[[x:[^ ]*]]
 ; X64-DAG:   movq global_fp(%rip), %r11
-; X64:       callq __llvm_lvi_thunk_r11
+; X64:       cs
+; X64-NEXT:  callq __llvm_lvi_thunk_r11
 ; X64-DAG:   movl %[[x]], %edi
 ; X64-DAG:   movq global_fp(%rip), %r11
-; X64:       jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64:       cs
+; X64-NEXT:  jmp __llvm_lvi_thunk_r11 # TAILCALL
 
 ; X64FAST-LABEL: icall_global_fp:
 ; X64FAST:       movq global_fp(%rip), %r11
-; X64FAST:       callq __llvm_lvi_thunk_r11
+; X64FAST:       cs
+; X64FAST-NEXT:  callq __llvm_lvi_thunk_r11
 ; X64FAST:       movq global_fp(%rip), %r11
-; X64FAST:       jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64FAST:       cs
+; X64FAST-NEXT:  jmp __llvm_lvi_thunk_r11 # TAILCALL
 
 
 %struct.Foo = type { ptr }
@@ -79,14 +87,18 @@
 ; X64:       movq (%rdi), %[[vptr:[^ ]*]]
 ; X64:       movq 8(%[[vptr]]), %[[fp:[^ ]*]]
 ; X64:       movq %[[fp]], %r11
-; X64:       callq __llvm_lvi_thunk_r11
+; X64:       cs
+; X64-NEXT:  callq __llvm_lvi_thunk_r11
 ; X64-DAG:   movq %[[obj]], %rdi
 ; X64-DAG:   movq %[[fp]], %r11
-; X64:       jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64:       cs
+; X64-NEXT:  jmp __llvm_lvi_thunk_r11 # TAILCALL
 
 ; X64FAST-LABEL: vcall:
-; X64FAST:       callq __llvm_lvi_thunk_r11
-; X64FAST:       jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64FAST:       cs
+; X64FAST-NEXT:  callq __llvm_lvi_thunk_r11
+; X64FAST:       cs
+; X64FAST-NEXT:  jmp __llvm_lvi_thunk_r11 # TAILCALL
 
 
 declare dso_local void @direct_callee()
@@ -113,14 +125,18 @@
 ; X64-LABEL: nonlazybind_caller:
 ; X64:       movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
 ; X64:       movq %[[REG]], %r11
-; X64:       callq __llvm_lvi_thunk_r11
+; X64:       cs
+; X64-NEXT:  callq __llvm_lvi_thunk_r11
 ; X64:       movq %[[REG]], %r11
-; X64:       jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64:       cs
+; X64-NEXT:  jmp __llvm_lvi_thunk_r11 # TAILCALL
 ; X64FAST-LABEL: nonlazybind_caller:
 ; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
-; X64FAST:   callq __llvm_lvi_thunk_r11
+; X64FAST:   cs
+; X64FAST-NEXT:  callq __llvm_lvi_thunk_r11
 ; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
-; X64FAST:   jmp __llvm_lvi_thunk_r11 # TAILCALL
+; X64FAST:   cs
+; X64FAST-NEXT:  jmp __llvm_lvi_thunk_r11 # TAILCALL
 
 
 ; Check that a switch gets lowered using a jump table
@@ -278,3 +294,7 @@
 ; X64-NEXT:          jmpq     *%r11
 
 attributes #1 = { nonlazybind }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 4, !"indirect_branch_cs_prefix", i32 1}