| //===-- Clustering.h --------------------------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// Utilities to compute benchmark result clusters. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H |
| #define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H |
| |
| #include "BenchmarkResult.h" |
| #include "llvm/ADT/Optional.h" |
| #include "llvm/Support/Error.h" |
| #include <limits> |
| #include <vector> |
| |
| namespace llvm { |
| namespace exegesis { |
| |
| class InstructionBenchmarkClustering { |
| public: |
| enum ModeE { Dbscan, Naive }; |
| |
| // Clusters `Points` using DBSCAN with the given parameters. See the cc file |
| // for more explanations on the algorithm. |
| static Expected<InstructionBenchmarkClustering> |
| create(const std::vector<InstructionBenchmark> &Points, ModeE Mode, |
| size_t DbscanMinPts, double AnalysisClusteringEpsilon, |
| const MCSubtargetInfo *SubtargetInfo = nullptr, |
| const MCInstrInfo *InstrInfo = nullptr); |
| |
| class ClusterId { |
| public: |
| static ClusterId noise() { return ClusterId(kNoise); } |
| static ClusterId error() { return ClusterId(kError); } |
| static ClusterId makeValid(size_t Id, bool IsUnstable = false) { |
| return ClusterId(Id, IsUnstable); |
| } |
| static ClusterId makeValidUnstable(size_t Id) { |
| return makeValid(Id, /*IsUnstable=*/true); |
| } |
| |
| ClusterId() : Id_(kUndef), IsUnstable_(false) {} |
| |
| // Compare id's, ignoring the 'unstability' bit. |
| bool operator==(const ClusterId &O) const { return Id_ == O.Id_; } |
| bool operator<(const ClusterId &O) const { return Id_ < O.Id_; } |
| |
| bool isValid() const { return Id_ <= kMaxValid; } |
| bool isUnstable() const { return IsUnstable_; } |
| bool isNoise() const { return Id_ == kNoise; } |
| bool isError() const { return Id_ == kError; } |
| bool isUndef() const { return Id_ == kUndef; } |
| |
| // Precondition: isValid(). |
| size_t getId() const { |
| assert(isValid()); |
| return Id_; |
| } |
| |
| private: |
| ClusterId(size_t Id, bool IsUnstable = false) |
| : Id_(Id), IsUnstable_(IsUnstable) {} |
| |
| static constexpr const size_t kMaxValid = |
| (std::numeric_limits<size_t>::max() >> 1) - 4; |
| static constexpr const size_t kNoise = kMaxValid + 1; |
| static constexpr const size_t kError = kMaxValid + 2; |
| static constexpr const size_t kUndef = kMaxValid + 3; |
| |
| size_t Id_ : (std::numeric_limits<size_t>::digits - 1); |
| size_t IsUnstable_ : 1; |
| }; |
| static_assert(sizeof(ClusterId) == sizeof(size_t), "should be a bit field."); |
| |
| struct Cluster { |
| Cluster() = delete; |
| explicit Cluster(const ClusterId &Id) : Id(Id) {} |
| |
| const ClusterId Id; |
| // Indices of benchmarks within the cluster. |
| std::vector<int> PointIndices; |
| }; |
| |
| ClusterId getClusterIdForPoint(size_t P) const { |
| return ClusterIdForPoint_[P]; |
| } |
| |
| const std::vector<InstructionBenchmark> &getPoints() const { return Points_; } |
| |
| const Cluster &getCluster(ClusterId Id) const { |
| assert(!Id.isUndef() && "unlabeled cluster"); |
| if (Id.isNoise()) { |
| return NoiseCluster_; |
| } |
| if (Id.isError()) { |
| return ErrorCluster_; |
| } |
| return Clusters_[Id.getId()]; |
| } |
| |
| const std::vector<Cluster> &getValidClusters() const { return Clusters_; } |
| |
| // Returns true if the given point is within a distance Epsilon of each other. |
| bool isNeighbour(const std::vector<BenchmarkMeasure> &P, |
| const std::vector<BenchmarkMeasure> &Q, |
| const double EpsilonSquared_) const { |
| double DistanceSquared = 0.0; |
| for (size_t I = 0, E = P.size(); I < E; ++I) { |
| const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue; |
| DistanceSquared += Diff * Diff; |
| } |
| return DistanceSquared <= EpsilonSquared_; |
| } |
| |
| private: |
| InstructionBenchmarkClustering( |
| const std::vector<InstructionBenchmark> &Points, |
| double AnalysisClusteringEpsilonSquared); |
| |
| Error validateAndSetup(); |
| |
| void clusterizeDbScan(size_t MinPts); |
| void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo, |
| const MCInstrInfo &InstrInfo); |
| |
| // Stabilization is only needed if dbscan was used to clusterize. |
| void stabilize(unsigned NumOpcodes); |
| |
| void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const; |
| |
| bool areAllNeighbours(ArrayRef<size_t> Pts) const; |
| |
| const std::vector<InstructionBenchmark> &Points_; |
| const double AnalysisClusteringEpsilonSquared_; |
| |
| int NumDimensions_ = 0; |
| // ClusterForPoint_[P] is the cluster id for Points[P]. |
| std::vector<ClusterId> ClusterIdForPoint_; |
| std::vector<Cluster> Clusters_; |
| Cluster NoiseCluster_; |
| Cluster ErrorCluster_; |
| }; |
| |
| class SchedClassClusterCentroid { |
| public: |
| const std::vector<PerInstructionStats> &getStats() const { |
| return Representative; |
| } |
| |
| std::vector<BenchmarkMeasure> getAsPoint() const; |
| |
| void addPoint(ArrayRef<BenchmarkMeasure> Point); |
| |
| bool validate(InstructionBenchmark::ModeE Mode) const; |
| |
| private: |
| // Measurement stats for the points in the SchedClassCluster. |
| std::vector<PerInstructionStats> Representative; |
| }; |
| |
| } // namespace exegesis |
| } // namespace llvm |
| |
| #endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H |