[MCA] Fix wrong definition of ResourceUnitMask in DefaultResourceStrategy.

Field ResourceUnitMask was incorrectly defined as a 'const unsigned' mask. It
should have been a 64 bit quantity instead. That means, ResourceUnitMask was
always implicitly truncated to a 32 bit quantity.
This issue has been found by inspection. Surprisingly, that bug was latent, and
it never negatively affected any existing upstream targets.

This patch fixes  the wrong definition of ResourceUnitMask, and adds a bunch of
extra debug prints to help debugging potential issues related to invalid
processor resource masks.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350820 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/MCA/HardwareUnits/ResourceManager.h b/include/llvm/MCA/HardwareUnits/ResourceManager.h
index 9114f7d..549a46c 100644
--- a/include/llvm/MCA/HardwareUnits/ResourceManager.h
+++ b/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -72,7 +72,7 @@
   ///
   /// There is one bit set for every available resource unit.
   /// It defaults to the value of field ResourceSizeMask in ResourceState.
-  const unsigned ResourceUnitMask;
+  const uint64_t ResourceUnitMask;
 
   /// A simple round-robin selector for processor resource units.
   /// Each bit of this mask identifies a sub resource within a group.
@@ -335,13 +335,13 @@
   // Used to quickly identify groups that own a particular resource unit.
   std::vector<uint64_t> Resource2Groups;
 
+  // A table to map processor resource IDs to processor resource masks.
+  SmallVector<uint64_t, 8> ProcResID2Mask;
+
   // Keeps track of which resources are busy, and how many cycles are left
   // before those become usable again.
   SmallDenseMap<ResourceRef, unsigned> BusyResources;
 
-  // A table to map processor resource IDs to processor resource masks.
-  SmallVector<uint64_t, 8> ProcResID2Mask;
-
   // Returns the actual resource unit that will be used.
   ResourceRef selectPipe(uint64_t ResourceID);
 
diff --git a/include/llvm/MCA/Instruction.h b/include/llvm/MCA/Instruction.h
index 38c7ff4..b91610c 100644
--- a/include/llvm/MCA/Instruction.h
+++ b/include/llvm/MCA/Instruction.h
@@ -138,8 +138,8 @@
 public:
   WriteState(const WriteDescriptor &Desc, unsigned RegID,
              bool clearsSuperRegs = false, bool writesZero = false)
-      : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
-        PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
+      : WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID), PRFID(0),
+        ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
         IsEliminated(false), DependentWrite(nullptr), PartialWrite(nullptr),
         DependentWriteCyclesLeft(0) {}
 
@@ -155,7 +155,9 @@
   void addUser(ReadState *Use, int ReadAdvance);
   void addUser(WriteState *Use);
 
-  unsigned getDependentWriteCyclesLeft() const { return DependentWriteCyclesLeft; }
+  unsigned getDependentWriteCyclesLeft() const {
+    return DependentWriteCyclesLeft;
+  }
 
   unsigned getNumUsers() const {
     unsigned NumUsers = Users.size();
@@ -348,11 +350,6 @@
   InstrDesc() = default;
   InstrDesc(const InstrDesc &Other) = delete;
   InstrDesc &operator=(const InstrDesc &Other) = delete;
-
-#ifndef NDEBUG
-  // Original instruction name for debugging purposes.
-  StringRef Name;
-#endif
 };
 
 /// Base class for instructions consumed by the simulation pipeline.
@@ -551,4 +548,4 @@
 } // namespace mca
 } // namespace llvm
 
-#endif  // LLVM_MCA_INSTRUCTION_H
+#endif // LLVM_MCA_INSTRUCTION_H
diff --git a/include/llvm/MCA/Stages/ExecuteStage.h b/include/llvm/MCA/Stages/ExecuteStage.h
index 4f40c5c..8cb287e 100644
--- a/include/llvm/MCA/Stages/ExecuteStage.h
+++ b/include/llvm/MCA/Stages/ExecuteStage.h
@@ -65,7 +65,7 @@
 
   void notifyInstructionIssued(
       const InstRef &IR,
-      ArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const;
+      MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const;
   void notifyInstructionExecuted(const InstRef &IR) const;
   void notifyInstructionReady(const InstRef &IR) const;
   void notifyResourceAvailable(const ResourceRef &RR) const;
diff --git a/include/llvm/MCA/Stages/InstructionTables.h b/include/llvm/MCA/Stages/InstructionTables.h
index 50f84fc..34e338f 100644
--- a/include/llvm/MCA/Stages/InstructionTables.h
+++ b/include/llvm/MCA/Stages/InstructionTables.h
@@ -32,7 +32,8 @@
   SmallVector<uint64_t, 8> Masks;
 
 public:
-  InstructionTables(const MCSchedModel &Model) : Stage(), SM(Model) {
+  InstructionTables(const MCSchedModel &Model)
+      : Stage(), SM(Model), Masks(Model.getNumProcResourceKinds()) {
     computeProcResourceMasks(Model, Masks);
   }
 
diff --git a/include/llvm/MCA/Support.h b/include/llvm/MCA/Support.h
index 5c5a354..7b0c5bf 100644
--- a/include/llvm/MCA/Support.h
+++ b/include/llvm/MCA/Support.h
@@ -104,7 +104,7 @@
 /// Resource masks are used by the ResourceManager to solve set membership
 /// problems with simple bit manipulation operations.
 void computeProcResourceMasks(const MCSchedModel &SM,
-                              SmallVectorImpl<uint64_t> &Masks);
+                              MutableArrayRef<uint64_t> Masks);
 
 /// Compute the reciprocal block throughput from a set of processor resource
 /// cycles. The reciprocal block throughput is computed as the MAX between:
diff --git a/lib/MCA/HardwareUnits/ResourceManager.cpp b/lib/MCA/HardwareUnits/ResourceManager.cpp
index d05ec00..2039b58 100644
--- a/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -118,7 +118,8 @@
 ResourceManager::ResourceManager(const MCSchedModel &SM)
     : Resources(SM.getNumProcResourceKinds()),
       Strategies(SM.getNumProcResourceKinds()),
-      Resource2Groups(SM.getNumProcResourceKinds(), 0) {
+      Resource2Groups(SM.getNumProcResourceKinds(), 0),
+      ProcResID2Mask(SM.getNumProcResourceKinds()) {
   computeProcResourceMasks(SM, ProcResID2Mask);
 
   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
@@ -283,9 +284,6 @@
       ResourceRef Pipe = selectPipe(R.first);
       use(Pipe);
       BusyResources[Pipe] += CS.size();
-      // Replace the resource mask with a valid processor resource index.
-      const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)];
-      Pipe.first = RS.getProcResourceID();
       Pipes.emplace_back(std::pair<ResourceRef, ResourceCycles>(
           Pipe, ResourceCycles(CS.size())));
     } else {
diff --git a/lib/MCA/InstrBuilder.cpp b/lib/MCA/InstrBuilder.cpp
index 2cfe154..d2d65e5 100644
--- a/lib/MCA/InstrBuilder.cpp
+++ b/lib/MCA/InstrBuilder.cpp
@@ -31,6 +31,8 @@
                            const llvm::MCInstrAnalysis *mcia)
     : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
       FirstReturnInst(true) {
+  const MCSchedModel &SM = STI.getSchedModel();
+  ProcResourceMasks.resize(SM.getNumProcResourceKinds());
   computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
 }
 
@@ -178,8 +180,8 @@
 
   LLVM_DEBUG({
     for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
-      dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", " <<
-                "cy=" << R.second.size() << '\n';
+      dbgs() << "\t\tMask=" << format_hex(R.first, 16) << ", "
+             << "cy=" << R.second.size() << '\n';
     for (const uint64_t R : ID.Buffers)
       dbgs() << "\t\tBuffer Mask=" << format_hex(R, 16) << '\n';
   });
@@ -525,6 +527,9 @@
         MCI);
   }
 
+  LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
+  LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
+
   // Create a new empty descriptor.
   std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
   ID->NumMicroOps = SCDesc.NumMicroOps;
@@ -559,9 +564,6 @@
   populateWrites(*ID, MCI, SchedClassID);
   populateReads(*ID, MCI, SchedClassID);
 
-#ifndef NDEBUG
-  ID->Name = MCII.getName(Opcode);
-#endif
   LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
   LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
 
diff --git a/lib/MCA/Pipeline.cpp b/lib/MCA/Pipeline.cpp
index fd97ea6..4c0e37c 100644
--- a/lib/MCA/Pipeline.cpp
+++ b/lib/MCA/Pipeline.cpp
@@ -83,13 +83,13 @@
 }
 
 void Pipeline::notifyCycleBegin() {
-  LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n');
+  LLVM_DEBUG(dbgs() << "\n[E] Cycle begin: " << Cycles << '\n');
   for (HWEventListener *Listener : Listeners)
     Listener->onCycleBegin();
 }
 
 void Pipeline::notifyCycleEnd() {
-  LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n");
+  LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n");
   for (HWEventListener *Listener : Listeners)
     Listener->onCycleEnd();
 }
diff --git a/lib/MCA/Stages/ExecuteStage.cpp b/lib/MCA/Stages/ExecuteStage.cpp
index 17f7ff7..e783277 100644
--- a/lib/MCA/Stages/ExecuteStage.cpp
+++ b/lib/MCA/Stages/ExecuteStage.cpp
@@ -57,6 +57,7 @@
   HWS.issueInstruction(IR, Used, Ready);
 
   notifyReservedOrReleasedBuffers(IR, /* Reserved */ false);
+
   notifyInstructionIssued(IR, Used);
   if (IR.getInstruction()->isExecuted()) {
     notifyInstructionExecuted(IR);
@@ -184,7 +185,7 @@
 
 void ExecuteStage::notifyInstructionIssued(
     const InstRef &IR,
-    ArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const {
+    MutableArrayRef<std::pair<ResourceRef, ResourceCycles>> Used) const {
   LLVM_DEBUG({
     dbgs() << "[E] Instruction Issued: #" << IR << '\n';
     for (const std::pair<ResourceRef, ResourceCycles> &Resource : Used) {
@@ -193,6 +194,11 @@
       dbgs() << "cycles: " << Resource.second << '\n';
     }
   });
+
+  // Replace resource masks with valid resource processor IDs.
+  for (std::pair<ResourceRef, ResourceCycles> &Use : Used)
+    Use.first.first = HWS.getResourceID(Use.first.first);
+
   notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used));
 }
 
diff --git a/lib/MCA/Support.cpp b/lib/MCA/Support.cpp
index 3271bc6..335953e 100644
--- a/lib/MCA/Support.cpp
+++ b/lib/MCA/Support.cpp
@@ -19,13 +19,18 @@
 namespace llvm {
 namespace mca {
 
+#define DEBUG_TYPE "llvm-mca"
+
 void computeProcResourceMasks(const MCSchedModel &SM,
-                              SmallVectorImpl<uint64_t> &Masks) {
+                              MutableArrayRef<uint64_t> Masks) {
   unsigned ProcResourceID = 0;
 
+  assert(Masks.size() == SM.getNumProcResourceKinds() &&
+         "Invalid number of elements");
+  // Resource at index 0 is the 'InvalidUnit'. Set an invalid mask for it.
+  Masks[0] = 0;
+
   // Create a unique bitmask for every processor resource unit.
-  // Skip resource at index 0, since it always references 'InvalidUnit'.
-  Masks.resize(SM.getNumProcResourceKinds());
   for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
     const MCProcResourceDesc &Desc = *SM.getProcResource(I);
     if (Desc.SubUnitsIdxBegin)
@@ -46,6 +51,16 @@
     }
     ProcResourceID++;
   }
+
+#ifndef NDEBUG
+  LLVM_DEBUG(dbgs() << "\nProcessor resource masks:"
+                    << "\n");
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+    LLVM_DEBUG(dbgs() << '[' << I << "] " << Desc.Name << " - " << Masks[I]
+                      << '\n');
+  }
+#endif
 }
 
 double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
diff --git a/tools/llvm-mca/Views/SummaryView.cpp b/tools/llvm-mca/Views/SummaryView.cpp
index 3a7eb82..d8ac709 100644
--- a/tools/llvm-mca/Views/SummaryView.cpp
+++ b/tools/llvm-mca/Views/SummaryView.cpp
@@ -27,7 +27,8 @@
                          unsigned Width)
     : SM(Model), Source(S), DispatchWidth(Width), LastInstructionIdx(0),
       TotalCycles(0), NumMicroOps(0),
-      ProcResourceUsage(Model.getNumProcResourceKinds(), 0) {
+      ProcResourceUsage(Model.getNumProcResourceKinds(), 0),
+      ProcResourceMasks(Model.getNumProcResourceKinds()) {
   computeProcResourceMasks(SM, ProcResourceMasks);
 }