blob: 00298f69f77f69e4df1704ee0a11aba6c5e95be6 [file] [log] [blame]
//===-- Analyze benchmark JSON files --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This code analyzes the json file produced by the `automemcpy` binary.
//
// As a remainder, `automemcpy` will benchmark each autogenerated memory
// functions against one of the predefined distributions available in the
// `libc/benchmarks/distributions` folder.
//
// It works as follows:
// - Reads one or more json files.
// - If there are several runs for the same function and distribution, picks the
// median throughput (aka `BytesPerSecond`).
// - Aggregates the throughput per distributions and scores them from worst (0)
// to best (1).
// - Each distribution categorizes each function into one of the following
// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
// BAD.
// - A process similar to the Majority Judgment voting system is used to `elect`
// the best function. The histogram of grades is returned so we can
// distinguish between functions with the same final grade. In the following
// example both functions grade EXCELLENT but we may prefer the second one.
//
// | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
// |------------|-----------|-----------|------|----------| ...
// | Function_1 | 7 | 1 | 2 | | ...
// | Function_2 | 6 | 4 | | | ...
#include "automemcpy/ResultAnalyzer.h"
#include "llvm/ADT/StringRef.h"
#include <numeric>
#include <unordered_map>
namespace llvm {
namespace automemcpy {
StringRef Grade::getString(const GradeEnum &GE) {
switch (GE) {
case EXCELLENT:
return "EXCELLENT";
case VERY_GOOD:
return "VERY_GOOD";
case GOOD:
return "GOOD";
case PASSABLE:
return "PASSABLE";
case INADEQUATE:
return "INADEQUATE";
case MEDIOCRE:
return "MEDIOCRE";
case BAD:
return "BAD";
case ARRAY_SIZE:
report_fatal_error("logic error");
}
}
Grade::GradeEnum Grade::judge(double Score) {
if (Score >= 6. / 7)
return EXCELLENT;
if (Score >= 5. / 7)
return VERY_GOOD;
if (Score >= 4. / 7)
return GOOD;
if (Score >= 3. / 7)
return PASSABLE;
if (Score >= 2. / 7)
return INADEQUATE;
if (Score >= 1. / 7)
return MEDIOCRE;
return BAD;
}
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
std::unordered_map<SampleId, std::vector<double>, SampleId::Hasher>
BucketedSamples;
for (const auto &S : Samples)
BucketedSamples[S.Id].push_back(S.BytesPerSecond);
std::unordered_map<FunctionId, StringMap<double>, FunctionId::Hasher>
Throughputs;
for (auto &Pair : BucketedSamples) {
const auto &Id = Pair.first;
auto &Values = Pair.second;
const size_t HalfSize = Values.size() / 2;
std::nth_element(Values.begin(), Values.begin() + HalfSize, Values.end());
const double MedianValue = Values[HalfSize];
Throughputs[Id.Function][Id.Distribution.Name] = MedianValue;
}
std::vector<FunctionData> Output;
for (auto &Pair : Throughputs) {
FunctionData Data;
Data.Id = Pair.first;
for (const auto &Pair : Pair.second)
Data.PerDistributionData[Pair.getKey()].MedianBytesPerSecond =
Pair.getValue();
Output.push_back(std::move(Data));
}
return Output;
}
void fillScores(MutableArrayRef<FunctionData> Functions) {
// A key to bucket throughput per function type and distribution.
struct Key {
FunctionType Type;
StringRef Distribution;
COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
};
// Tracks minimum and maximum values.
struct MinMax {
double Min = std::numeric_limits<double>::max();
double Max = std::numeric_limits<double>::min();
void update(double Value) {
if (Value < Min)
Min = Value;
if (Value > Max)
Max = Value;
}
double normalize(double Value) const { return (Value - Min) / (Max - Min); }
};
std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
for (const auto &Function : Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
const double Throughput = Pair.getValue().MedianBytesPerSecond;
const Key K{Type, Distribution};
ThroughputMinMax[K].update(Throughput);
}
}
for (auto &Function : Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
const double Throughput = Pair.getValue().MedianBytesPerSecond;
const Key K{Type, Distribution};
Function.PerDistributionData[Distribution].Score =
ThroughputMinMax[K].normalize(Throughput);
}
}
}
void castVotes(MutableArrayRef<FunctionData> Functions) {
for (FunctionData &Function : Functions)
for (const auto &Pair : Function.PerDistributionData) {
const StringRef Distribution = Pair.getKey();
const double Score = Pair.getValue().Score;
const auto G = Grade::judge(Score);
++(Function.GradeHisto[G]);
Function.PerDistributionData[Distribution].Grade = G;
}
for (FunctionData &Function : Functions) {
const auto &GradeHisto = Function.GradeHisto;
const size_t Votes =
std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
const size_t MedianVote = Votes / 2;
size_t CountedVotes = 0;
Grade::GradeEnum MedianGrade = Grade::BAD;
for (size_t I = 0; I < GradeHisto.size(); ++I) {
CountedVotes += GradeHisto[I];
if (CountedVotes > MedianVote) {
MedianGrade = Grade::GradeEnum(I);
break;
}
}
Function.FinalGrade = MedianGrade;
}
}
} // namespace automemcpy
} // namespace llvm