blob: b134f6c83a0dfe21fbe397aa6faf3f9b93fdb02d [file] [log] [blame]
//===-- Analyze benchmark JSON files --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This code analyzes the json file produced by the `automemcpy` binary.
//
// As a remainder, `automemcpy` will benchmark each autogenerated memory
// functions against one of the predefined distributions available in the
// `libc/benchmarks/distributions` folder.
//
// It works as follows:
// - Reads one or more json files.
// - If there are several runs for the same function and distribution, picks the
// median throughput (aka `BytesPerSecond`).
// - Aggregates the throughput per distributions and scores them from worst (0)
// to best (1).
// - Each distribution categorizes each function into one of the following
// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
// BAD.
// - A process similar to the Majority Judgment voting system is used to `elect`
// the best function. The histogram of grades is returned so we can
// distinguish between functions with the same final grade. In the following
// example both functions grade EXCELLENT but we may prefer the second one.
//
// | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
// |------------|-----------|-----------|------|----------| ...
// | Function_1 | 7 | 1 | 2 | | ...
// | Function_2 | 6 | 4 | | | ...
#include "automemcpy/ResultAnalyzer.h"
#include "llvm/ADT/StringRef.h"
#include <numeric>
#include <unordered_map>
namespace llvm {
namespace automemcpy {
StringRef Grade::getString(const GradeEnum &GE) {
switch (GE) {
case EXCELLENT:
return "EXCELLENT";
case VERY_GOOD:
return "VERY_GOOD";
case GOOD:
return "GOOD";
case PASSABLE:
return "PASSABLE";
case INADEQUATE:
return "INADEQUATE";
case MEDIOCRE:
return "MEDIOCRE";
case BAD:
return "BAD";
case ARRAY_SIZE:
report_fatal_error("logic error");
}
}
Grade::GradeEnum Grade::judge(double Score) {
if (Score >= 6. / 7)
return EXCELLENT;
if (Score >= 5. / 7)
return VERY_GOOD;
if (Score >= 4. / 7)
return GOOD;
if (Score >= 3. / 7)
return PASSABLE;
if (Score >= 2. / 7)
return INADEQUATE;
if (Score >= 1. / 7)
return MEDIOCRE;
return BAD;
}
static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
const double SampleMean) {
assert(!Samples.empty());
if (Samples.size() == 1)
return 0;
double DiffSquaresSum = 0;
for (const double S : Samples) {
const double Diff = S - SampleMean;
DiffSquaresSum += Diff * Diff;
}
return DiffSquaresSum / (Samples.size() - 1);
}
static void processPerDistributionData(PerDistributionData &Data) {
auto &Samples = Data.BytesPerSecondSamples;
assert(!Samples.empty());
// Sample Mean
const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
Data.BytesPerSecondMean = Sum / Samples.size();
// Unbiased Sample Variance
Data.BytesPerSecondVariance =
computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
// Median
const size_t HalfSize = Samples.size() / 2;
std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
Data.BytesPerSecondMedian = Samples[HalfSize];
}
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
for (const auto &S : Samples) {
if (S.Type != SampleType::ITERATION)
break;
auto &Function = Functions[S.Id.Function];
auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
}
std::vector<FunctionData> Output;
for (auto &[FunctionId, Function] : Functions) {
Function.Id = FunctionId;
for (auto &Pair : Function.PerDistributionData)
processPerDistributionData(Pair.second);
Output.push_back(std::move(Function));
}
return Output;
}
void fillScores(MutableArrayRef<FunctionData> Functions) {
// A key to bucket throughput per function type and distribution.
struct Key {
FunctionType Type;
StringRef Distribution;
COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
};
// Tracks minimum and maximum values.
struct MinMax {
double Min = std::numeric_limits<double>::max();
double Max = std::numeric_limits<double>::min();
void update(double Value) {
if (Value < Min)
Min = Value;
if (Value > Max)
Max = Value;
}
double normalize(double Value) const { return (Value - Min) / (Max - Min); }
};
std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
for (const auto &Function : Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
const double Throughput = Pair.getValue().BytesPerSecondMedian;
const Key K{Type, Distribution};
ThroughputMinMax[K].update(Throughput);
}
}
for (auto &Function : Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
const double Throughput = Pair.getValue().BytesPerSecondMedian;
const Key K{Type, Distribution};
Function.PerDistributionData[Distribution].Score =
ThroughputMinMax[K].normalize(Throughput);
}
}
}
void castVotes(MutableArrayRef<FunctionData> Functions) {
for (FunctionData &Function : Functions) {
Function.ScoresGeoMean = 1.0;
for (const auto &Pair : Function.PerDistributionData) {
const StringRef Distribution = Pair.getKey();
const double Score = Pair.getValue().Score;
Function.ScoresGeoMean *= Score;
const auto G = Grade::judge(Score);
++(Function.GradeHisto[G]);
Function.PerDistributionData[Distribution].Grade = G;
}
}
for (FunctionData &Function : Functions) {
const auto &GradeHisto = Function.GradeHisto;
const size_t Votes =
std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
const size_t MedianVote = Votes / 2;
size_t CountedVotes = 0;
Grade::GradeEnum MedianGrade = Grade::BAD;
for (size_t I = 0; I < GradeHisto.size(); ++I) {
CountedVotes += GradeHisto[I];
if (CountedVotes > MedianVote) {
MedianGrade = Grade::GradeEnum(I);
break;
}
}
Function.FinalGrade = MedianGrade;
}
}
} // namespace automemcpy
} // namespace llvm