| //===-- Analyze benchmark JSON files --------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // This code analyzes the json file produced by the `automemcpy` binary. |
| // |
| // As a remainder, `automemcpy` will benchmark each autogenerated memory |
| // functions against one of the predefined distributions available in the |
| // `libc/benchmarks/distributions` folder. |
| // |
| // It works as follows: |
| // - Reads one or more json files. |
| // - If there are several runs for the same function and distribution, picks the |
| // median throughput (aka `BytesPerSecond`). |
| // - Aggregates the throughput per distributions and scores them from worst (0) |
| // to best (1). |
| // - Each distribution categorizes each function into one of the following |
| // categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE, |
| // BAD. |
| // - A process similar to the Majority Judgment voting system is used to `elect` |
| // the best function. The histogram of grades is returned so we can |
| // distinguish between functions with the same final grade. In the following |
| // example both functions grade EXCELLENT but we may prefer the second one. |
| // |
| // | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ... |
| // |------------|-----------|-----------|------|----------| ... |
| // | Function_1 | 7 | 1 | 2 | | ... |
| // | Function_2 | 6 | 4 | | | ... |
| |
| #include "automemcpy/ResultAnalyzer.h" |
| #include "llvm/ADT/StringRef.h" |
| #include <numeric> |
| #include <unordered_map> |
| |
| namespace llvm { |
| |
| namespace automemcpy { |
| |
| StringRef Grade::getString(const GradeEnum &GE) { |
| switch (GE) { |
| case EXCELLENT: |
| return "EXCELLENT"; |
| case VERY_GOOD: |
| return "VERY_GOOD"; |
| case GOOD: |
| return "GOOD"; |
| case PASSABLE: |
| return "PASSABLE"; |
| case INADEQUATE: |
| return "INADEQUATE"; |
| case MEDIOCRE: |
| return "MEDIOCRE"; |
| case BAD: |
| return "BAD"; |
| case ARRAY_SIZE: |
| report_fatal_error("logic error"); |
| } |
| } |
| |
| Grade::GradeEnum Grade::judge(double Score) { |
| if (Score >= 6. / 7) |
| return EXCELLENT; |
| if (Score >= 5. / 7) |
| return VERY_GOOD; |
| if (Score >= 4. / 7) |
| return GOOD; |
| if (Score >= 3. / 7) |
| return PASSABLE; |
| if (Score >= 2. / 7) |
| return INADEQUATE; |
| if (Score >= 1. / 7) |
| return MEDIOCRE; |
| return BAD; |
| } |
| |
| static double computeUnbiasedSampleVariance(const std::vector<double> &Samples, |
| const double SampleMean) { |
| assert(!Samples.empty()); |
| if (Samples.size() == 1) |
| return 0; |
| double DiffSquaresSum = 0; |
| for (const double S : Samples) { |
| const double Diff = S - SampleMean; |
| DiffSquaresSum += Diff * Diff; |
| } |
| return DiffSquaresSum / (Samples.size() - 1); |
| } |
| |
| static void processPerDistributionData(PerDistributionData &Data) { |
| auto &Samples = Data.BytesPerSecondSamples; |
| assert(!Samples.empty()); |
| // Sample Mean |
| const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0); |
| Data.BytesPerSecondMean = Sum / Samples.size(); |
| // Unbiased Sample Variance |
| Data.BytesPerSecondVariance = |
| computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean); |
| // Median |
| const size_t HalfSize = Samples.size() / 2; |
| std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end()); |
| Data.BytesPerSecondMedian = Samples[HalfSize]; |
| } |
| |
| std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) { |
| std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions; |
| for (const auto &S : Samples) { |
| if (S.Type != SampleType::ITERATION) |
| break; |
| auto &Function = Functions[S.Id.Function]; |
| auto &Data = Function.PerDistributionData[S.Id.Distribution.Name]; |
| Data.BytesPerSecondSamples.push_back(S.BytesPerSecond); |
| } |
| |
| std::vector<FunctionData> Output; |
| for (auto &[FunctionId, Function] : Functions) { |
| Function.Id = FunctionId; |
| for (auto &Pair : Function.PerDistributionData) |
| processPerDistributionData(Pair.second); |
| Output.push_back(std::move(Function)); |
| } |
| return Output; |
| } |
| |
| void fillScores(MutableArrayRef<FunctionData> Functions) { |
| // A key to bucket throughput per function type and distribution. |
| struct Key { |
| FunctionType Type; |
| StringRef Distribution; |
| |
| COMPARABLE_AND_HASHABLE(Key, Type, Distribution) |
| }; |
| |
| // Tracks minimum and maximum values. |
| struct MinMax { |
| double Min = std::numeric_limits<double>::max(); |
| double Max = std::numeric_limits<double>::min(); |
| void update(double Value) { |
| if (Value < Min) |
| Min = Value; |
| if (Value > Max) |
| Max = Value; |
| } |
| double normalize(double Value) const { return (Value - Min) / (Max - Min); } |
| }; |
| |
| std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax; |
| for (const auto &Function : Functions) { |
| const FunctionType Type = Function.Id.Type; |
| for (const auto &Pair : Function.PerDistributionData) { |
| const auto &Distribution = Pair.getKey(); |
| const double Throughput = Pair.getValue().BytesPerSecondMedian; |
| const Key K{Type, Distribution}; |
| ThroughputMinMax[K].update(Throughput); |
| } |
| } |
| |
| for (auto &Function : Functions) { |
| const FunctionType Type = Function.Id.Type; |
| for (const auto &Pair : Function.PerDistributionData) { |
| const auto &Distribution = Pair.getKey(); |
| const double Throughput = Pair.getValue().BytesPerSecondMedian; |
| const Key K{Type, Distribution}; |
| Function.PerDistributionData[Distribution].Score = |
| ThroughputMinMax[K].normalize(Throughput); |
| } |
| } |
| } |
| |
| void castVotes(MutableArrayRef<FunctionData> Functions) { |
| for (FunctionData &Function : Functions) { |
| Function.ScoresGeoMean = 1.0; |
| for (const auto &Pair : Function.PerDistributionData) { |
| const StringRef Distribution = Pair.getKey(); |
| const double Score = Pair.getValue().Score; |
| Function.ScoresGeoMean *= Score; |
| const auto G = Grade::judge(Score); |
| ++(Function.GradeHisto[G]); |
| Function.PerDistributionData[Distribution].Grade = G; |
| } |
| } |
| |
| for (FunctionData &Function : Functions) { |
| const auto &GradeHisto = Function.GradeHisto; |
| const size_t Votes = |
| std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U); |
| const size_t MedianVote = Votes / 2; |
| size_t CountedVotes = 0; |
| Grade::GradeEnum MedianGrade = Grade::BAD; |
| for (size_t I = 0; I < GradeHisto.size(); ++I) { |
| CountedVotes += GradeHisto[I]; |
| if (CountedVotes > MedianVote) { |
| MedianGrade = Grade::GradeEnum(I); |
| break; |
| } |
| } |
| Function.FinalGrade = MedianGrade; |
| } |
| } |
| |
| } // namespace automemcpy |
| } // namespace llvm |