Reland "[ARM] push LR before __gnu_mcount_nc"

This relands r369147 with fixes to unit tests.

https://reviews.llvm.org/D65019

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369173 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index 4792af0..4da29ee 100644
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -778,4 +778,9 @@
 def int_arm_neon_sdot : Neon_Dot_Intrinsic;
 
 
+// GNU eabi mcount
+def int_arm_gnu_eabi_mcount : Intrinsic<[],
+                                    [],
+                                    [IntrReadMem, IntrWriteMem]>;
+
 } // end TargetPrefix
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index bd4ca38..73ebe09 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1916,6 +1916,37 @@
 
     case ARM::CMP_SWAP_64:
       return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
+
+    case ARM::tBL_PUSHLR:
+    case ARM::BL_PUSHLR: {
+      const bool Thumb = Opcode == ARM::tBL_PUSHLR;
+      Register Reg = MI.getOperand(0).getReg();
+      assert(Reg == ARM::LR && "expect LR register!");
+      MachineInstrBuilder MIB;
+      if (Thumb) {
+        // push {lr}
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH))
+            .add(predOps(ARMCC::AL))
+            .addReg(Reg);
+
+        // bl __gnu_mcount_nc
+        MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL));
+      } else {
+        // stmdb   sp!, {lr}
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD))
+            .addReg(ARM::SP, RegState::Define)
+            .addReg(ARM::SP)
+            .add(predOps(ARMCC::AL))
+            .addReg(Reg);
+
+        // bl __gnu_mcount_nc
+        MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL));
+      }
+      MIB.cloneMemRefs(MI);
+      for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i));
+      MI.eraseFromParent();
+      return true;
+    }
   }
 }
 
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 9bf4458..63b8f2a 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1017,6 +1017,7 @@
   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
   setOperationAction(ISD::SRL,       MVT::i64, Custom);
   setOperationAction(ISD::SRA,       MVT::i64, Custom);
+  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
 
   // MVE lowers 64 bit shifts to lsll and lsrl
@@ -3544,6 +3545,48 @@
                      Op.getOperand(0));
 }
 
+SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
+    SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
+  unsigned IntNo =
+      cast<ConstantSDNode>(
+          Op.getOperand(Op.getOperand(0).getValueType() == MVT::Other))
+          ->getZExtValue();
+  switch (IntNo) {
+    default:
+      return SDValue();  // Don't custom lower most intrinsics.
+    case Intrinsic::arm_gnu_eabi_mcount: {
+      MachineFunction &MF = DAG.getMachineFunction();
+      EVT PtrVT = getPointerTy(DAG.getDataLayout());
+      SDLoc dl(Op);
+      SDValue Chain = Op.getOperand(0);
+      // call "\01__gnu_mcount_nc"
+      const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
+      const uint32_t *Mask =
+          ARI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
+      assert(Mask && "Missing call preserved mask for calling convention");
+      // Mark LR an implicit live-in.
+      unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+      SDValue ReturnAddress =
+          DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
+      std::vector<EVT> ResultTys = {MVT::Other, MVT::Glue};
+      SDValue Callee =
+          DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
+      SDValue RegisterMask = DAG.getRegisterMask(Mask);
+      if (Subtarget->isThumb())
+        return SDValue(
+            DAG.getMachineNode(
+                ARM::tBL_PUSHLR, dl, ResultTys,
+                {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
+                 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
+            0);
+      return SDValue(
+          DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
+                             {ReturnAddress, Callee, RegisterMask, Chain}),
+          0);
+    }
+  }
+}
+
 SDValue
 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                                           const ARMSubtarget *Subtarget) const {
@@ -8835,6 +8878,7 @@
   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
   case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
+  case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
                                                                Subtarget);
   case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 15dcd9d..d84a235 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -667,6 +667,8 @@
     SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG,
+                                    const ARMSubtarget *Subtarget) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                                     const ARMSubtarget *Subtarget) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 7783bed..ac1be46 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -2370,6 +2370,12 @@
   def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins arm_bl_target:$func),
                                8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
                       Requires<[IsARM]>, Sched<[WriteBr]>;
+
+  // push lr before the call
+  def BL_PUSHLR : ARMPseudoInst<(outs), (ins GPRlr:$ra, arm_bl_target:$func),
+                  4, IIC_Br,
+                  []>,
+             Requires<[IsARM]>, Sched<[WriteBr]>;
 }
 
 let isBranch = 1, isTerminator = 1 in {
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index ef2814b..18bcbda 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -565,6 +565,13 @@
                   4, IIC_Br,
                   [(ARMcall_nolink tGPR:$func)]>,
             Requires<[IsThumb, IsThumb1Only]>, Sched<[WriteBr]>;
+
+  // Also used for Thumb2
+  // push lr before the call
+  def tBL_PUSHLR : tPseudoInst<(outs), (ins GPRlr:$ra, pred:$p, thumb_bl_target:$func),
+                  4, IIC_Br,
+                  []>,
+             Requires<[IsThumb]>, Sched<[WriteBr]>;
 }
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 4aa40ee..57e2ff0 100644
--- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -24,7 +24,7 @@
 
   if (Func == "mcount" ||
       Func == ".mcount" ||
-      Func == "\01__gnu_mcount_nc" ||
+      Func == "llvm.arm.gnu.eabi.mcount" ||
       Func == "\01_mcount" ||
       Func == "\01mcount" ||
       Func == "__mcount" ||
diff --git a/test/CodeGen/ARM/gnu_mcount_nc.ll b/test/CodeGen/ARM/gnu_mcount_nc.ll
new file mode 100644
index 0000000..c36991f
--- /dev/null
+++ b/test/CodeGen/ARM/gnu_mcount_nc.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=armv7a-linux-gnueabihf -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-ARM
+; RUN: llc -mtriple=armv7a-linux-gnueabihf -verify-machineinstrs -fast-isel %s -o - | FileCheck %s --check-prefix=CHECK-ARM-FAST-ISEL
+; RUN: llc -mtriple=armv7a-linux-gnueabihf -verify-machineinstrs -global-isel -global-isel-abort=2 %s -o - | FileCheck %s --check-prefix=CHECK-ARM-GLOBAL-ISEL
+; RUN: llc -mtriple=thumbv7a-linux-gnueabihf -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-THUMB
+; RUN: llc -mtriple=thumbv7a-linux-gnueabihf -verify-machineinstrs -fast-isel %s -o - | FileCheck %s --check-prefix=CHECK-THUMB-FAST-ISEL
+; RUN: llc -mtriple=thumbv7a-linux-gnueabihf -verify-machineinstrs -global-isel -global-isel-abort=2 %s -o - | FileCheck %s --check-prefix=CHECK-THUMB-GLOBAL-ISEL
+
+define dso_local void @callee() #0 {
+; CHECK-ARM:                    stmdb   sp!, {lr}
+; CHECK-ARM-NEXT:               bl      __gnu_mcount_nc
+; CHECK-ARM-FAST-ISEL:          stmdb   sp!, {lr}
+; CHECK-ARM-FAST-ISEL-NEXT:     bl      __gnu_mcount_nc
+; CHECK-ARM-GLOBAL-ISEL:        stmdb   sp!, {lr}
+; CHECK-ARM-GLOBAL-ISEL-NEXT:   bl      __gnu_mcount_nc
+; CHECK-THUMB:                  push    {lr}
+; CHECK-THUMB-NEXT:             bl      __gnu_mcount_nc
+; CHECK-THUMB-FAST-ISEL:        push    {lr}
+; CHECK-THUMB-FAST-ISEL-NEXT:   bl      __gnu_mcount_nc
+; CHECK-THUMB-GLOBAL-ISEL:      push    {lr}
+; CHECK-THUMB-GLOBAL-ISEL-NEXT: bl      __gnu_mcount_nc
+  ret void
+}
+
+define dso_local void @caller() #0 {
+; CHECK-ARM:                    stmdb   sp!, {lr}
+; CHECK-ARM-NEXT:               bl      __gnu_mcount_nc
+; CHECK-ARM-FAST-ISEL:          stmdb   sp!, {lr}
+; CHECK-ARM-FAST-ISEL-NEXT:     bl      __gnu_mcount_nc
+; CHECK-ARM-GLOBAL-ISEL:        stmdb   sp!, {lr}
+; CHECK-ARM-GLOBAL-ISEL-NEXT:   bl      __gnu_mcount_nc
+; CHECK-THUMB:                  push    {lr}
+; CHECK-THUMB-NEXT:             bl      __gnu_mcount_nc
+; CHECK-THUMB-FAST-ISEL:        push    {lr}
+; CHECK-THUMB-FAST-ISEL-NEXT:   bl      __gnu_mcount_nc
+; CHECK-THUMB-GLOBAL-ISEL:      push    {lr}
+; CHECK-THUMB-GLOBAL-ISEL-NEXT: bl      __gnu_mcount_nc
+  call void @callee()
+  ret void
+}
+
+attributes #0 = { nofree nounwind "instrument-function-entry-inlined"="llvm.arm.gnu.eabi.mcount" }
diff --git a/test/Transforms/EntryExitInstrumenter/mcount.ll b/test/Transforms/EntryExitInstrumenter/mcount.ll
index 3048360..56c8b53 100644
--- a/test/Transforms/EntryExitInstrumenter/mcount.ll
+++ b/test/Transforms/EntryExitInstrumenter/mcount.ll
@@ -54,7 +54,7 @@
 
 define void @f2() #2 { entry: ret void }
 ; CHECK-LABEL: define void @f2
-; CHECK: call void @"\01__gnu_mcount_nc"
+; CHECK: call void @llvm.arm.gnu.eabi.mcount
 
 define void @f3() #3 { entry: ret void }
 ; CHECK-LABEL: define void @f3
@@ -105,7 +105,7 @@
 
 attributes #0 = { "instrument-function-entry-inlined"="mcount" "instrument-function-entry"="__cyg_profile_func_enter" "instrument-function-exit"="__cyg_profile_func_exit" }
 attributes #1 = { "instrument-function-entry-inlined"=".mcount" }
-attributes #2 = { "instrument-function-entry-inlined"="\01__gnu_mcount_nc" }
+attributes #2 = { "instrument-function-entry-inlined"="llvm.arm.gnu.eabi.mcount" }
 attributes #3 = { "instrument-function-entry-inlined"="\01_mcount" }
 attributes #4 = { "instrument-function-entry-inlined"="\01mcount" }
 attributes #5 = { "instrument-function-entry-inlined"="__mcount" }