[TargetInstrInfo] Add target hook for InstrSchedModel latency. [NFCI] These hooks already exist when using instruction itineraries for latency info, this patch adds them for the newer TargetSchedModel. Allows targets to dynamically set latency values in the DAG builder. This is useful in multi-pass schedulers like in the AMDGUP backend where we may want to schedule a region multiple times with a different machine model or tweaked latencies for a specific instruction type.

commit: 450737fba25203c8dfeca036925e9af9d6c22f0a [log] [tgz]
author: Austin Kerbow <Austin.Kerbow@amd.com> Tue Feb 25 19:37:46 2025 -0800
committer: Austin Kerbow <Austin.Kerbow@amd.com> Mon Sep 15 22:51:53 2025 -0700
tree: 06789ed2bc773275a2886d205e93b37b4c54e0e1
parent: 64dba812a3a8fc86b4ddbf34ad5bc5b5329cfca8 [diff]
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 6a624a7..6589f85 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h

@@ -1848,6 +1848,13 @@
                     const MachineInstr &DefMI, unsigned DefIdx,
                     const MachineInstr &UseMI, unsigned UseIdx) const;
 
+  /// Compute the latency of a register data dependence (DefIdx -> UseIdx)
+  /// using the TargetSchedModel.
+  virtual std::optional<unsigned>
+  getOperandLatency(const TargetSchedModel &SchedModel,
+                    const MachineInstr *DefMI, unsigned DefIdx,
+                    const MachineInstr *UseMI, unsigned UseIdx) const;
+
   /// Compute the instruction latency of a given instruction.
   /// If the instruction has higher cost when predicated, it's returned via
   /// PredCost.
@@ -1855,6 +1862,11 @@
                                    const MachineInstr &MI,
                                    unsigned *PredCost = nullptr) const;
 
+  /// Compute the instruction latency using the TargetSchedModel.
+  virtual std::optional<unsigned>
+  getInstrLatency(const TargetSchedModel &TargetSchedModel,
+                  const MachineInstr &MI) const;
+
   virtual unsigned getPredicationCost(const MachineInstr &MI) const;
 
   virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,

diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h
index 74f8ed5..b033ed6 100644
--- a/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/llvm/include/llvm/CodeGen/TargetSchedule.h

@@ -44,8 +44,6 @@
   // Resource units per cycle. Latency normalization factor.
   unsigned ResourceLCM = 0;
 
-  unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const;
-
   // EnableSchedModel and EnableSchedItins are used to control whether or not to
   // use the Target's {SchedMachineModel, InstrItins} for hardware infor based
   // Scheduling decisions. If both are enabled, as is the default, preference
@@ -203,6 +201,7 @@
                                         bool UseDefaultDefLatency = true) const;
   LLVM_ABI unsigned computeInstrLatency(const MCInst &Inst) const;
   LLVM_ABI unsigned computeInstrLatency(unsigned Opcode) const;
+  LLVM_ABI unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const;
 
   /// Output dependency latency of a pair of defs of the same register.
   ///

diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index b0009560..70c9d86 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp

@@ -1830,6 +1830,85 @@
   return ItinData->getStageLatency(MI.getDesc().getSchedClass());
 }
 
+std::optional<unsigned>
+TargetInstrInfo::getInstrLatency(const TargetSchedModel &TargetSchedModel,
+                                 const MachineInstr &MI) const {
+  if (TargetSchedModel.hasInstrSchedModel()) {
+    const MCSchedClassDesc *SCDesc = TargetSchedModel.resolveSchedClass(&MI);
+    if (SCDesc->isValid())
+      return TargetSchedModel.computeInstrLatency(*SCDesc);
+  }
+
+  return std::nullopt;
+}
+
+std::optional<unsigned> TargetInstrInfo::getOperandLatency(const TargetSchedModel &SchedModel,
+                                            const MachineInstr *DefMI,
+                                            unsigned DefOperIdx,
+                                            const MachineInstr *UseMI,
+                                            unsigned UseOperIdx) const {
+  // Only handle the TargetSchedModel-based computation here. If no
+  // instruction scheduling model is available, defer to the caller.
+  if (!SchedModel.hasInstrSchedModel())
+    return std::nullopt;
+
+  const MCSchedClassDesc *SCDesc = SchedModel.resolveSchedClass(DefMI);
+  if (!SCDesc->isValid())
+    return std::nullopt;
+
+  // Compute DefIdx from operand index.
+  unsigned DefIdx = 0;
+  for (unsigned I = 0; I != DefOperIdx; ++I) {
+    const MachineOperand &MO = DefMI->getOperand(I);
+    if (MO.isReg() && MO.isDef())
+      ++DefIdx;
+  }
+  if (DefIdx < SCDesc->NumWriteLatencyEntries) {
+    // Lookup the definition's write latency in SubtargetInfo.
+    const TargetSubtargetInfo *STI = SchedModel.getSubtargetInfo();
+    const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx);
+    unsigned WriteID = WLEntry->WriteResourceID;
+    unsigned Latency = WLEntry->Cycles >= 0 ? static_cast<unsigned>(WLEntry->Cycles) : 1000u;
+    if (!UseMI)
+      return Latency;
+
+    // Lookup the use's latency adjustment in SubtargetInfo.
+    const MCSchedClassDesc *UseDesc = SchedModel.resolveSchedClass(UseMI);
+    if (UseDesc->NumReadAdvanceEntries == 0)
+      return Latency;
+    // Compute UseIdx from operand index.
+    unsigned UseIdx = 0;
+    for (unsigned I = 0; I != UseOperIdx; ++I) {
+      const MachineOperand &MO = UseMI->getOperand(I);
+      if (MO.isReg() && MO.readsReg() && !MO.isDef())
+        ++UseIdx;
+    }
+    int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
+    if (Advance > 0 && static_cast<unsigned>(Advance) > Latency) // unsigned wrap
+      return 0;
+    return Latency - Advance;
+  }
+
+  // If DefIdx does not exist in the model (e.g. implicit defs), then return
+  // unit latency (defaultDefLatency may be too conservative).
+#ifndef NDEBUG
+  if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() &&
+      !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() &&
+      SchedModel.getMCSchedModel()->isComplete()) {
+    errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
+           << *DefMI
+           << " (Try with MCSchedModel.CompleteModel set to false)";
+    llvm_unreachable("incomplete machine model");
+  }
+#endif
+
+  // FIXME: Automatically giving all implicit defs defaultDefLatency is
+  // undesirable. We should only do it for defs that are known to the MC
+  // desc like flags. Truly implicit defs should get 1 cycle latency.
+  const MCSchedModel *MCSM = SchedModel.getMCSchedModel();
+  return DefMI->isTransient() ? 0 : defaultDefLatency(*MCSM, *DefMI);
+}
+
 bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
                                        const MachineInstr &DefMI,
                                        unsigned DefIdx) const {

diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp
index 7ae9e0e..210f3e2 100644
--- a/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/llvm/lib/CodeGen/TargetSchedule.cpp

@@ -134,37 +134,6 @@
   return SCDesc;
 }
 
-/// Find the def index of this operand. This index maps to the machine model and
-/// is independent of use operands. Def operands may be reordered with uses or
-/// merged with uses without affecting the def index (e.g. before/after
-/// regalloc). However, an instruction's def operands must never be reordered
-/// with respect to each other.
-static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) {
-  unsigned DefIdx = 0;
-  for (unsigned i = 0; i != DefOperIdx; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isDef())
-      ++DefIdx;
-  }
-  return DefIdx;
-}
-
-/// Find the use index of this operand. This is independent of the instruction's
-/// def operands.
-///
-/// Note that uses are not determined by the operand's isUse property, which
-/// is simply the inverse of isDef. Here we consider any readsReg operand to be
-/// a "use". The machine model allows an operand to be both a Def and Use.
-static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
-  unsigned UseIdx = 0;
-  for (unsigned i = 0; i != UseOperIdx; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.readsReg() && !MO.isDef())
-      ++UseIdx;
-  }
-  return UseIdx;
-}
-
 // Top-level API for clients that know the operand indices. This doesn't need to
 // return std::optional<unsigned>, as it always returns a valid latency.
 unsigned TargetSchedModel::computeOperandLatency(
@@ -177,8 +146,8 @@
   if (!hasInstrSchedModel() && !hasInstrItineraries())
     return DefaultDefLatency;
 
+  std::optional<unsigned> OperLatency;
   if (hasInstrItineraries()) {
-    std::optional<unsigned> OperLatency;
     if (UseMI) {
       OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx,
                                            *UseMI, UseOperIdx);
@@ -195,41 +164,11 @@
   }
 
   // hasInstrSchedModel()
-  const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI);
-  unsigned DefIdx = findDefIdx(DefMI, DefOperIdx);
-  if (DefIdx < SCDesc->NumWriteLatencyEntries) {
-    // Lookup the definition's write latency in SubtargetInfo.
-    const MCWriteLatencyEntry *WLEntry =
-      STI->getWriteLatencyEntry(SCDesc, DefIdx);
-    unsigned WriteID = WLEntry->WriteResourceID;
-    unsigned Latency = capLatency(WLEntry->Cycles);
-    if (!UseMI)
-      return Latency;
+  OperLatency =
+      TII->getOperandLatency(*this, DefMI, DefOperIdx, UseMI, UseOperIdx);
+  if (OperLatency)
+    return *OperLatency;
 
-    // Lookup the use's latency adjustment in SubtargetInfo.
-    const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI);
-    if (UseDesc->NumReadAdvanceEntries == 0)
-      return Latency;
-    unsigned UseIdx = findUseIdx(UseMI, UseOperIdx);
-    int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID);
-    if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap
-      return 0;
-    return Latency - Advance;
-  }
-  // If DefIdx does not exist in the model (e.g. implicit defs), then return
-  // unit latency (defaultDefLatency may be too conservative).
-#ifndef NDEBUG
-  if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() &&
-      !DefMI->getDesc().operands()[DefOperIdx].isOptionalDef() &&
-      SchedModel.isComplete()) {
-    errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
-           << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)";
-    llvm_unreachable("incomplete machine model");
-  }
-#endif
-  // FIXME: Automatically giving all implicit defs defaultDefLatency is
-  // undesirable. We should only do it for defs that are known to the MC
-  // desc like flags. Truly implicit defs should get 1 cycle latency.
   return DefMI->isTransient() ? 0 : DefaultDefLatency;
 }
 
@@ -259,12 +198,11 @@
       (!hasInstrSchedModel() && !UseDefaultDefLatency))
     return TII->getInstrLatency(&InstrItins, *MI);
 
-  if (hasInstrSchedModel()) {
-    const MCSchedClassDesc *SCDesc = resolveSchedClass(MI);
-    if (SCDesc->isValid())
-      return computeInstrLatency(*SCDesc);
-  }
-  return TII->defaultDefLatency(SchedModel, *MI);
+  std::optional<unsigned> InstrLatency;
+  // This is used by subtargets that define an InstrSchedModel.
+  InstrLatency = TII->getInstrLatency(*this, *MI);
+
+  return InstrLatency ? *InstrLatency : TII->defaultDefLatency(SchedModel, *MI);
 }
 
 unsigned TargetSchedModel::
commit	450737fba25203c8dfeca036925e9af9d6c22f0a	[log] [tgz]
author	Austin Kerbow <Austin.Kerbow@amd.com>	Tue Feb 25 19:37:46 2025 -0800
committer	Austin Kerbow <Austin.Kerbow@amd.com>	Mon Sep 15 22:51:53 2025 -0700
tree	06789ed2bc773275a2886d205e93b37b4c54e0e1
parent	64dba812a3a8fc86b4ddbf34ad5bc5b5329cfca8 [diff]