blob: 932cbba2038e4cfeb3196a67e44dfcb764921f09 [file] [log] [blame]
//===-- Clustering.h --------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Utilities to compute benchmark result clusters.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
#include "BenchmarkResult.h"
#include "llvm/Support/Error.h"
#include <vector>
namespace llvm {
namespace exegesis {
class InstructionBenchmarkClustering {
public:
// Clusters `Points` using DBSCAN with the given parameters. See the cc file
// for more explanations on the algorithm.
static llvm::Expected<InstructionBenchmarkClustering>
create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
double Epsilon);
class ClusterId {
public:
static ClusterId noise() { return ClusterId(kNoise); }
static ClusterId error() { return ClusterId(kError); }
static ClusterId makeValid(size_t Id) { return ClusterId(Id); }
ClusterId() : Id_(kUndef) {}
bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
bool operator<(const ClusterId &O) const { return Id_ < O.Id_; }
bool isValid() const { return Id_ <= kMaxValid; }
bool isUndef() const { return Id_ == kUndef; }
bool isNoise() const { return Id_ == kNoise; }
bool isError() const { return Id_ == kError; }
// Precondition: isValid().
size_t getId() const {
assert(isValid());
return Id_;
}
private:
explicit ClusterId(size_t Id) : Id_(Id) {}
static constexpr const size_t kMaxValid =
std::numeric_limits<size_t>::max() - 4;
static constexpr const size_t kNoise = kMaxValid + 1;
static constexpr const size_t kError = kMaxValid + 2;
static constexpr const size_t kUndef = kMaxValid + 3;
size_t Id_;
};
struct Cluster {
Cluster() = delete;
explicit Cluster(const ClusterId &Id) : Id(Id) {}
const ClusterId Id;
// Indices of benchmarks within the cluster.
std::vector<int> PointIndices;
};
ClusterId getClusterIdForPoint(size_t P) const {
return ClusterIdForPoint_[P];
}
const std::vector<InstructionBenchmark> &getPoints() const { return Points_; }
const Cluster &getCluster(ClusterId Id) const {
assert(!Id.isUndef() && "unlabeled cluster");
if (Id.isNoise()) {
return NoiseCluster_;
}
if (Id.isError()) {
return ErrorCluster_;
}
return Clusters_[Id.getId()];
}
const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
// Returns true if the given point is within a distance Epsilon of each other.
bool isNeighbour(const std::vector<BenchmarkMeasure> &P,
const std::vector<BenchmarkMeasure> &Q) const {
double DistanceSquared = 0.0;
for (size_t I = 0, E = P.size(); I < E; ++I) {
const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue;
DistanceSquared += Diff * Diff;
}
return DistanceSquared <= EpsilonSquared_;
}
private:
InstructionBenchmarkClustering(
const std::vector<InstructionBenchmark> &Points, double EpsilonSquared);
llvm::Error validateAndSetup();
void dbScan(size_t MinPts);
void rangeQuery(size_t Q, std::vector<size_t> &Scratchpad) const;
const std::vector<InstructionBenchmark> &Points_;
const double EpsilonSquared_;
int NumDimensions_ = 0;
// ClusterForPoint_[P] is the cluster id for Points[P].
std::vector<ClusterId> ClusterIdForPoint_;
std::vector<Cluster> Clusters_;
Cluster NoiseCluster_;
Cluster ErrorCluster_;
};
} // namespace exegesis
} // namespace llvm
#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H