[AVR] Expand 8/16-bit multiplication to libcalls on MCUs that don't have hardware MUL

This change modifies the LLVM ISel lowering settings so that
8-bit/16-bit multiplication is expanded to calls into the compiler
runtime library if the MCU being targeted does not support
multiplication in hardware.

Before this, MUL instructions would be generated on CPUs like the
ATtiny85, triggering a CPU reset due to an illegal instruction at
runtime.

First raised in https://github.com/avr-rust/rust/issues/124.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351523 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index 57fc978..5db7577 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -26,19 +26,21 @@
 
 #include "AVR.h"
 #include "AVRMachineFunctionInfo.h"
+#include "AVRSubtarget.h"
 #include "AVRTargetMachine.h"
 #include "MCTargetDesc/AVRMCTargetDesc.h"
 
 namespace llvm {
 
-AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
-    : TargetLowering(tm) {
+AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
+                                     const AVRSubtarget &STI)
+    : TargetLowering(TM), Subtarget(STI) {
   // Set up the register classes.
   addRegisterClass(MVT::i8, &AVR::GPR8RegClass);
   addRegisterClass(MVT::i16, &AVR::DREGSRegClass);
 
   // Compute derived properties from the register classes.
-  computeRegisterProperties(tm.getSubtargetImpl()->getRegisterInfo());
+  computeRegisterProperties(Subtarget.getRegisterInfo());
 
   setBooleanContents(ZeroOrOneBooleanContent);
   setBooleanVectorContents(ZeroOrOneBooleanContent);
@@ -163,6 +165,13 @@
   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 
+  // Expand multiplications to libcalls when there is
+  // no hardware MUL.
+  if (!Subtarget.supportsMultiplication()) {
+    setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+    setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+  }
+
   for (MVT VT : MVT::integer_valuetypes()) {
     setOperationAction(ISD::MULHS, VT, Expand);
     setOperationAction(ISD::MULHU, VT, Expand);
@@ -1271,7 +1280,7 @@
 
   // Add a register mask operand representing the call-preserved registers.
   const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
-  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
   const uint32_t *Mask =
       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
@@ -1434,7 +1443,7 @@
   MachineFunction *F = BB->getParent();
   MachineRegisterInfo &RI = F->getRegInfo();
   const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
-  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   switch (MI.getOpcode()) {
@@ -1575,7 +1584,7 @@
 MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
                                                 MachineBasicBlock *BB) const {
   const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
-  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   MachineBasicBlock::iterator I(MI);
   ++I; // in any case insert *after* the mul instruction
   if (isCopyMulResult(I))
@@ -1838,9 +1847,6 @@
 AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                 StringRef Constraint,
                                                 MVT VT) const {
-  auto STI = static_cast<const AVRTargetMachine &>(this->getTargetMachine())
-                 .getSubtargetImpl();
-
   // We only support i8 and i16.
   //
   //:FIXME: remove this assert for now since it gets sometimes executed
@@ -1884,8 +1890,8 @@
     }
   }
 
-  return TargetLowering::getRegForInlineAsmConstraint(STI->getRegisterInfo(),
-                                                      Constraint, VT);
+  return TargetLowering::getRegForInlineAsmConstraint(
+      Subtarget.getRegisterInfo(), Constraint, VT);
 }
 
 void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h
index c90c65c..7d77dd8 100644
--- a/lib/Target/AVR/AVRISelLowering.h
+++ b/lib/Target/AVR/AVRISelLowering.h
@@ -64,12 +64,14 @@
 
 } // end of namespace AVRISD
 
+class AVRSubtarget;
 class AVRTargetMachine;
 
 /// Performs target lowering for the AVR.
 class AVRTargetLowering : public TargetLowering {
 public:
-  explicit AVRTargetLowering(AVRTargetMachine &TM);
+  explicit AVRTargetLowering(const AVRTargetMachine &TM,
+                             const AVRSubtarget &STI);
 
 public:
   MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override {
@@ -164,6 +166,10 @@
                           const SDLoc &dl, SelectionDAG &DAG,
                           SmallVectorImpl<SDValue> &InVals) const;
 
+protected:
+
+  const AVRSubtarget &Subtarget;
+
 private:
   MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const;
   MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp
index 556d69e..c7c5662 100644
--- a/lib/Target/AVR/AVRSubtarget.cpp
+++ b/lib/Target/AVR/AVRSubtarget.cpp
@@ -29,9 +29,9 @@
 namespace llvm {
 
 AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
-                           const std::string &FS, AVRTargetMachine &TM)
+                           const std::string &FS, const AVRTargetMachine &TM)
     : AVRGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(),
-      TLInfo(TM), TSInfo(),
+      TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(),
 
       // Subtarget features
       m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false),
@@ -44,4 +44,12 @@
   ParseSubtargetFeatures(CPU, FS);
 }
 
+AVRSubtarget &
+AVRSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+                                              const TargetMachine &TM) {
+  // Parse features string.
+  ParseSubtargetFeatures(CPU, FS);
+  return *this;
+}
+
 } // end of namespace llvm
diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h
index fa26738..ba036d5 100644
--- a/lib/Target/AVR/AVRSubtarget.h
+++ b/lib/Target/AVR/AVRSubtarget.h
@@ -37,7 +37,7 @@
   //! \param FS  The feature string.
   //! \param TM  The target machine.
   AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
-               AVRTargetMachine &TM);
+               const AVRTargetMachine &TM);
 
   const AVRInstrInfo *getInstrInfo() const override { return &InstrInfo; }
   const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; }
@@ -49,6 +49,9 @@
   /// \note Definition of function is auto generated by `tblgen`.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
+  AVRSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+                                                const TargetMachine &TM);
+
   // Subtarget feature getters.
   // See AVR.td for details.
   bool hasSRAM() const { return m_hasSRAM; }
diff --git a/test/CodeGen/AVR/mul.ll b/test/CodeGen/AVR/hardware-mul.ll
similarity index 90%
rename from test/CodeGen/AVR/mul.ll
rename to test/CodeGen/AVR/hardware-mul.ll
index 2f16934..6506978 100644
--- a/test/CodeGen/AVR/mul.ll
+++ b/test/CodeGen/AVR/hardware-mul.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -mattr=mul,movw < %s -march=avr | FileCheck %s
 
+; Tests lowering of multiplication to hardware instructions.
+
 define i8 @mult8(i8 %a, i8 %b) {
 ; CHECK-LABEL: mult8:
 ; CHECK: muls r22, r24
diff --git a/test/CodeGen/AVR/smul-with-overflow.ll b/test/CodeGen/AVR/smul-with-overflow.ll
index 745e930..9eb2c74 100644
--- a/test/CodeGen/AVR/smul-with-overflow.ll
+++ b/test/CodeGen/AVR/smul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=avr | FileCheck %s
+; RUN: llc -mattr=avr6 < %s -march=avr | FileCheck %s
 
 define i1 @signed_multiplication_did_overflow(i8, i8) unnamed_addr {
 ; CHECK-LABEL: signed_multiplication_did_overflow:
diff --git a/test/CodeGen/AVR/software-mul.ll b/test/CodeGen/AVR/software-mul.ll
new file mode 100644
index 0000000..9a4d281
--- /dev/null
+++ b/test/CodeGen/AVR/software-mul.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mattr=avr6,-mul < %s -march=avr | FileCheck %s
+; RUN: llc -mcpu=attiny85 < %s -march=avr | FileCheck %s
+; RUN: llc -mcpu=ata5272 < %s -march=avr | FileCheck %s
+; RUN: llc -mcpu=attiny861a < %s -march=avr | FileCheck %s
+; RUN: llc -mcpu=at90usb82 < %s -march=avr | FileCheck %s
+
+; Tests lowering of multiplication to compiler support routines.
+
+; CHECK-LABEL: mul8:
+define i8 @mul8(i8 %a, i8 %b) {
+; CHECK: mov  r25, r24
+; CHECK: mov  r24, r22
+; CHECK: mov  r22, r25
+; CHECK: call __mulqi3
+  %mul = mul i8 %b, %a
+  ret i8 %mul
+}
+
+; CHECK-LABEL: mul16:
+define i16 @mul16(i16 %a, i16 %b) {
+; CHECK: movw  r18, r24
+; CHECK: movw  r24, r22
+; CHECK: movw  r22, r18
+; CHECK: call  __mulhi3
+  %mul = mul nsw i16 %b, %a
+  ret i16 %mul
+}
+
diff --git a/test/CodeGen/AVR/umul-with-overflow.ll b/test/CodeGen/AVR/umul-with-overflow.ll
index aa8b10a..c645755 100644
--- a/test/CodeGen/AVR/umul-with-overflow.ll
+++ b/test/CodeGen/AVR/umul-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=avr | FileCheck %s
+; RUN: llc -mattr=avr6 < %s -march=avr | FileCheck %s
 
 define i1 @unsigned_multiplication_did_overflow(i8, i8) unnamed_addr {
 ; CHECK-LABEL: unsigned_multiplication_did_overflow: