[CodeGen] Fix incorrect usage of MCPhysReg for diff list elements

The lists contain differences between register numbers, not the register
numbers themselves. Since a difference can also be negative, this also
changes its type to signed.

Changing the type to signed exposed a "bug". For AMDGPU, which has many
registers, the first element of a sequence could be as big as ~45k.
The value does not fit into int16_t, but fits into uint16_t. The bug
didn't show up because of unsigned wrapping and truncation of the Val
field in the advance() method.

To fix the issue, I changed the way regunit difflists are encoded. The
4-bit 'scale' field of MCRegisterDesc::RegUnit was replaced by 12-bit
number of the first regunit, and the first element of each of the lists
was removed. The higher 20 bits of RegUnit field contain the initial
offset into DiffLists array.
AMDGPU has 1'409 regunits (2^12 = 4'096), and the biggest offset is
80'041 (2^20 = 1'048'576). That is, there is enough room.

Changing the encoding method also resulted in a smaller array size, the
numbers are below (I omitted targets with less than 100 elements).

```
AMDGPU   | 80052 | 78741 |  -1,6%
RISCV    |  6498 |  6297 |  -3,1%
ARM      |  4181 |  3966 |  -5,1%
AArch64  |  2770 |  2592 |  -6,4%
PPC      |  1578 |  1441 |  -8,7%
Hexagon  |   994 |   740 | -25,6%
R600     |   508 |   398 | -21,7%
VE       |   471 |   459 |  -2,5%
Sparc    |   381 |   363 |  -4,7%
X86      |   326 |   208 | -36,2%
Mips     |   253 |   200 | -20,9%
SystemZ  |   186 |   162 | -12,9%
```

Reviewed By: foad, arsenm

Differential Revision: https://reviews.llvm.org/D151036

GitOrigin-RevId: 7a258706e3d194ea9cec364c8e535446646fbcf1
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index cafa0ba..8e48903 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -111,8 +111,8 @@
   // sub-register in SubRegs.
   uint32_t SubRegIndices;
 
-  // RegUnits - Points to the list of register units. The low 4 bits holds the
-  // Scale, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
+  // Points to the list of register units. The low bits hold the first regunit
+  // number, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
   uint32_t RegUnits;
 
   /// Index into list with lane mask sequences. The sequence contains a lanemask
@@ -161,7 +161,7 @@
   unsigned NumClasses;                        // Number of entries in the array
   unsigned NumRegUnits;                       // Number of regunits.
   const MCPhysReg (*RegUnitRoots)[2];         // Pointer to regunit root table.
-  const MCPhysReg *DiffLists;                 // Pointer to the difflists array
+  const int16_t *DiffLists;                   // Pointer to the difflists array
   const LaneBitmask *RegUnitMaskSequences;    // Pointer to lane mask sequences
                                               // for register units.
   const char *RegStrings;                     // Pointer to the string table.
@@ -194,31 +194,19 @@
   /// Don't use this class directly, use one of the specialized sub-classes
   /// defined below.
   class DiffListIterator {
-    uint16_t Val = 0;
-    const MCPhysReg *List = nullptr;
+    unsigned Val = 0;
+    const int16_t *List = nullptr;
 
   protected:
     /// Create an invalid iterator. Call init() to point to something useful.
     DiffListIterator() = default;
 
-    /// init - Point the iterator to InitVal, decoding subsequent values from
-    /// DiffList. The iterator will initially point to InitVal, sub-classes are
-    /// responsible for skipping the seed value if it is not part of the list.
-    void init(MCPhysReg InitVal, const MCPhysReg *DiffList) {
+    /// Point the iterator to InitVal, decoding subsequent values from DiffList.
+    void init(unsigned InitVal, const int16_t *DiffList) {
       Val = InitVal;
       List = DiffList;
     }
 
-    /// advance - Move to the next list position, return the applied
-    /// differential. This function does not detect the end of the list, that
-    /// is the caller's responsibility (by checking for a 0 return value).
-    MCRegister advance() {
-      assert(isValid() && "Cannot move off the end of the list.");
-      MCPhysReg D = *List++;
-      Val += D;
-      return D;
-    }
-
   public:
     /// isValid - returns true if this iterator is not yet at the end.
     bool isValid() const { return List; }
@@ -228,8 +216,11 @@
 
     /// Pre-increment to move to the next position.
     void operator++() {
+      assert(isValid() && "Cannot move off the end of the list.");
+      int16_t D = *List++;
+      Val += D;
       // The end of the list is encoded as a 0 differential.
-      if (!advance())
+      if (!D)
         List = nullptr;
     }
 
@@ -248,9 +239,9 @@
   protected:
     mc_difflist_iterator(MCRegisterInfo::DiffListIterator Iter) : Iter(Iter) {}
 
-    // Allow conversion between instantiations where valid.
-    mc_difflist_iterator(MCRegister Reg, const MCPhysReg *DiffList) {
-      Iter.init(Reg, DiffList);
+    /// Point the iterator to InitVal, decoding subsequent values from DiffList.
+    void init(unsigned InitVal, const int16_t *DiffList) {
+      Iter.init(InitVal, DiffList);
       Val = *Iter;
     }
 
@@ -287,8 +278,11 @@
     mc_subreg_iterator(MCRegisterInfo::DiffListIterator Iter)
         : mc_difflist_iterator(Iter) {}
     mc_subreg_iterator() = default;
-    mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
-        : mc_difflist_iterator(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs) {}
+
+    mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
+      assert(MCRegister::isPhysicalRegister(Reg.id()));
+      init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SubRegs);
+    }
   };
 
   /// Forward iterator over all super-registers.
@@ -299,9 +293,11 @@
     mc_superreg_iterator(MCRegisterInfo::DiffListIterator Iter)
         : mc_difflist_iterator(Iter) {}
     mc_superreg_iterator() = default;
-    mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
-        : mc_difflist_iterator(Reg,
-                               MCRI->DiffLists + MCRI->get(Reg).SuperRegs) {}
+
+    mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
+      assert(MCRegister::isPhysicalRegister(Reg.id()));
+      init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
+    }
   };
 
   /// Return an iterator range over all sub-registers of \p Reg, excluding \p
@@ -351,16 +347,11 @@
   /// Initialize MCRegisterInfo, called by TableGen
   /// auto-generated routines. *DO NOT USE*.
   void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR, unsigned RA,
-                          unsigned PC,
-                          const MCRegisterClass *C, unsigned NC,
-                          const MCPhysReg (*RURoots)[2],
-                          unsigned NRU,
-                          const MCPhysReg *DL,
-                          const LaneBitmask *RUMS,
-                          const char *Strings,
-                          const char *ClassStrings,
-                          const uint16_t *SubIndices,
-                          unsigned NumIndices,
+                          unsigned PC, const MCRegisterClass *C, unsigned NC,
+                          const MCPhysReg (*RURoots)[2], unsigned NRU,
+                          const int16_t *DL, const LaneBitmask *RUMS,
+                          const char *Strings, const char *ClassStrings,
+                          const uint16_t *SubIndices, unsigned NumIndices,
                           const SubRegCoveredBits *SubIdxRanges,
                           const uint16_t *RET) {
     Desc = D;
@@ -598,7 +589,8 @@
 public:
   MCSubRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
                    bool IncludeSelf = false) {
-    init(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs);
+    assert(MCRegister::isPhysicalRegister(Reg.id()));
+    init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SubRegs);
     // Initially, the iterator points to Reg itself.
     if (!IncludeSelf)
       ++*this;
@@ -647,7 +639,8 @@
 
   MCSuperRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
                      bool IncludeSelf = false) {
-    init(Reg, MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
+    assert(MCRegister::isPhysicalRegister(Reg.id()));
+    init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
     // Initially, the iterator points to Reg itself.
     if (!IncludeSelf)
       ++*this;
@@ -675,6 +668,9 @@
 // MCRegUnitIterator enumerates a list of register units for Reg. The list is
 // in ascending numerical order.
 class MCRegUnitIterator : public MCRegisterInfo::DiffListIterator {
+  // The value must be kept in sync with RegisterInfoEmitter.cpp.
+  static constexpr unsigned RegUnitBits = 12;
+
 public:
   /// MCRegUnitIterator - Create an iterator that traverses the register units
   /// in Reg.
@@ -685,18 +681,9 @@
     assert(MCRegister::isPhysicalRegister(Reg.id()));
     // Decode the RegUnits MCRegisterDesc field.
     unsigned RU = MCRI->get(Reg).RegUnits;
-    unsigned Scale = RU & 15;
-    unsigned Offset = RU >> 4;
-
-    // Initialize the iterator to Reg * Scale, and the List pointer to
-    // DiffLists + Offset.
-    init(Reg * Scale, MCRI->DiffLists + Offset);
-
-    // That may not be a valid unit, we need to advance by one to get the real
-    // unit number. The first differential can be 0 which would normally
-    // terminate the list, but since we know every register has at least one
-    // unit, we can allow a 0 differential here.
-    advance();
+    unsigned FirstRU = RU & ((1u << RegUnitBits) - 1);
+    unsigned Offset = RU >> RegUnitBits;
+    init(FirstRU, MCRI->DiffLists + Offset);
   }
 
   MCRegUnitIterator &operator++() {
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 1058f3b..f9115e4 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -392,7 +392,7 @@
         break;
       }
       I->Size = std::max(I->Size, II->Size);
-      if (TRI->isSuperRegister(I->Reg, II->Reg))
+      if (I->Reg && TRI->isSuperRegister(I->Reg, II->Reg))
         I->Reg = II->Reg;
       II->Reg = 0; // mark for deletion.
     }
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index f35dd36..1f433c0 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -635,17 +635,16 @@
 // The initial value depends on the specific list. The list is terminated by a
 // 0 differential which means we can't encode repeated elements.
 
-typedef SmallVector<uint16_t, 4> DiffVec;
+typedef SmallVector<int16_t, 4> DiffVec;
 typedef SmallVector<LaneBitmask, 4> MaskVec;
 
-// Differentially encode a sequence of numbers into V. The starting value and
-// terminating 0 are not added to V, so it will have the same size as List.
-static
-DiffVec &diffEncode(DiffVec &V, unsigned InitVal, SparseBitVector<> List) {
+// Fills V with differentials between every two consecutive elements of List.
+static DiffVec &diffEncode(DiffVec &V, SparseBitVector<> List) {
   assert(V.empty() && "Clear DiffVec before diffEncode.");
-  uint16_t Val = uint16_t(InitVal);
-
-  for (uint16_t Cur : List) {
+  SparseBitVector<>::iterator I = List.begin(), E = List.end();
+  unsigned Val = *I;
+  while (++I != E) {
+    unsigned Cur = *I;
     V.push_back(Cur - Val);
     Val = Cur;
   }
@@ -656,18 +655,16 @@
 static
 DiffVec &diffEncode(DiffVec &V, unsigned InitVal, Iter Begin, Iter End) {
   assert(V.empty() && "Clear DiffVec before diffEncode.");
-  uint16_t Val = uint16_t(InitVal);
+  unsigned Val = InitVal;
   for (Iter I = Begin; I != End; ++I) {
-    uint16_t Cur = (*I)->EnumValue;
+    unsigned Cur = (*I)->EnumValue;
     V.push_back(Cur - Val);
     Val = Cur;
   }
   return V;
 }
 
-static void printDiff16(raw_ostream &OS, uint16_t Val) {
-  OS << Val;
-}
+static void printDiff16(raw_ostream &OS, int16_t Val) { OS << Val; }
 
 static void printMask(raw_ostream &OS, LaneBitmask Val) {
   OS << "LaneBitmask(0x" << PrintLaneMask(Val) << ')';
@@ -891,7 +888,6 @@
   SmallVector<DiffVec, 4> SubRegLists(Regs.size());
   SmallVector<DiffVec, 4> SuperRegLists(Regs.size());
   SmallVector<DiffVec, 4> RegUnitLists(Regs.size());
-  SmallVector<unsigned, 4> RegUnitInitScale(Regs.size());
 
   // List of lane masks accompanying register unit sequences.
   SequenceToOffsetTable<MaskVec> LaneMaskSeqs;
@@ -929,31 +925,8 @@
                SuperRegList.end());
     DiffSeqs.add(SuperRegLists[i]);
 
-    // Differentially encode the register unit list, seeded by register number.
-    // First compute a scale factor that allows more diff-lists to be reused:
-    //
-    //   D0 -> (S0, S1)
-    //   D1 -> (S2, S3)
-    //
-    // A scale factor of 2 allows D0 and D1 to share a diff-list. The initial
-    // value for the differential decoder is the register number multiplied by
-    // the scale.
-    //
-    // Check the neighboring registers for arithmetic progressions.
-    unsigned ScaleA = ~0u, ScaleB = ~0u;
-    SparseBitVector<> RUs = Reg.getNativeRegUnits();
-    if (I != Regs.begin() &&
-        std::prev(I)->getNativeRegUnits().count() == RUs.count())
-      ScaleB = *RUs.begin() - *std::prev(I)->getNativeRegUnits().begin();
-    if (std::next(I) != Regs.end() &&
-        std::next(I)->getNativeRegUnits().count() == RUs.count())
-      ScaleA = *std::next(I)->getNativeRegUnits().begin() - *RUs.begin();
-    unsigned Scale = std::min(ScaleB, ScaleA);
-    // Default the scale to 0 if it can't be encoded in 4 bits.
-    if (Scale >= 16)
-      Scale = 0;
-    RegUnitInitScale[i] = Scale;
-    DiffSeqs.add(diffEncode(RegUnitLists[i], Scale * Reg.EnumValue, RUs));
+    const SparseBitVector<> &RUs = Reg.getNativeRegUnits();
+    DiffSeqs.add(diffEncode(RegUnitLists[i], RUs));
 
     const auto &RUMasks = Reg.getRegUnitLaneMasks();
     MaskVec &LaneMaskVec = RegUnitLaneMasks[i];
@@ -978,7 +951,7 @@
   const std::string &TargetName = std::string(Target.getName());
 
   // Emit the shared table of differential lists.
-  OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[] = {\n";
+  OS << "extern const int16_t " << TargetName << "RegDiffLists[] = {\n";
   DiffSeqs.emit(OS, printDiff16);
   OS << "};\n\n";
 
@@ -1014,10 +987,16 @@
   // Emit the register descriptors now.
   i = 0;
   for (const auto &Reg : Regs) {
+    unsigned FirstRU = Reg.getNativeRegUnits().find_first();
+    unsigned Offset = DiffSeqs.get(RegUnitLists[i]);
+    // The value must be kept in sync with MCRegisterInfo.h.
+    constexpr unsigned RegUnitBits = 12;
+    assert(isUInt<RegUnitBits>(FirstRU) && "Too many regunits");
+    assert(isUInt<32 - RegUnitBits>(Offset) && "Offset is too big");
     OS << "  { " << RegStrings.get(std::string(Reg.getName())) << ", "
        << DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i])
        << ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
-       << (DiffSeqs.get(RegUnitLists[i]) * 16 + RegUnitInitScale[i]) << ", "
+       << (Offset << RegUnitBits | FirstRU) << ", "
        << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << " },\n";
     ++i;
   }
@@ -1651,7 +1630,7 @@
 
   // Emit the constructor of the class...
   OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
-  OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[];\n";
+  OS << "extern const int16_t " << TargetName << "RegDiffLists[];\n";
   OS << "extern const LaneBitmask " << TargetName << "LaneMaskLists[];\n";
   OS << "extern const char " << TargetName << "RegStrings[];\n";
   OS << "extern const char " << TargetName << "RegClassStrings[];\n";