benchmarks/automemcpy/lib/ResultAnalyzer.cpp - llvm-project/libc - Git at Google

 //===-- Analyze benchmark JSON files --------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 // This code analyzes the json file produced by the `automemcpy` binary.
 //
 // As a remainder, `automemcpy` will benchmark each autogenerated memory
 // functions against one of the predefined distributions available in the
 // `libc/benchmarks/distributions` folder.
 //
 // It works as follows:
 // - Reads one or more json files.
 // - If there are several runs for the same function and distribution, picks the
 //   median throughput (aka `BytesPerSecond`).
 // - Aggregates the throughput per distributions and scores them from worst (0)
 //   to best (1).
 // - Each distribution categorizes each function into one of the following
 //   categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
 //   BAD.
 // - A process similar to the Majority Judgment voting system is used to `elect`
 //   the best function. The histogram of grades is returned so we can
 //   distinguish between functions with the same final grade. In the following
 //   example both functions grade EXCELLENT but we may prefer the second one.
 //
 //   |            | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
 //   |------------|-----------|-----------|------|----------| ...
 //   | Function_1 |     7     |     1     |   2  |          | ...
 //   | Function_2 |     6     |     4     |      |          | ...

 #include "automemcpy/ResultAnalyzer.h"
 #include "llvm/ADT/StringRef.h"
 #include <numeric>
 #include <unordered_map>

 namespace llvm {

 namespace automemcpy {

 StringRef Grade::getString(const GradeEnum &GE) {
   switch (GE) {
   case EXCELLENT:
     return "EXCELLENT";
   case VERY_GOOD:
     return "VERY_GOOD";
   case GOOD:
     return "GOOD";
   case PASSABLE:
     return "PASSABLE";
   case INADEQUATE:
     return "INADEQUATE";
   case MEDIOCRE:
     return "MEDIOCRE";
   case BAD:
     return "BAD";
   case ARRAY_SIZE:
     report_fatal_error("logic error");
   }
 }

 Grade::GradeEnum Grade::judge(double Score) {
   if (Score >= 6. / 7)
     return EXCELLENT;
   if (Score >= 5. / 7)
     return VERY_GOOD;
   if (Score >= 4. / 7)
     return GOOD;
   if (Score >= 3. / 7)
     return PASSABLE;
   if (Score >= 2. / 7)
     return INADEQUATE;
   if (Score >= 1. / 7)
     return MEDIOCRE;
   return BAD;
 }

 static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
                                             const double SampleMean) {
   assert(!Samples.empty());
   if (Samples.size() == 1)
     return 0;
   double DiffSquaresSum = 0;
   for (const double S : Samples) {
     const double Diff = S - SampleMean;
     DiffSquaresSum += Diff * Diff;
   }
   return DiffSquaresSum / (Samples.size() - 1);
 }

 static void processPerDistributionData(PerDistributionData &Data) {
   auto &Samples = Data.BytesPerSecondSamples;
   assert(!Samples.empty());
   // Sample Mean
   const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
   Data.BytesPerSecondMean = Sum / Samples.size();
   // Unbiased Sample Variance
   Data.BytesPerSecondVariance =
       computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
   // Median
   const size_t HalfSize = Samples.size() / 2;
   std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
   Data.BytesPerSecondMedian = Samples[HalfSize];
 }

 std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
   std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
   for (const auto &S : Samples) {
     if (S.Type != SampleType::ITERATION)
       break;
     auto &Function = Functions[S.Id.Function];
     auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
     Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
   }

   std::vector<FunctionData> Output;
   for (auto &[FunctionId, Function] : Functions) {
     Function.Id = FunctionId;
     for (auto &Pair : Function.PerDistributionData)
       processPerDistributionData(Pair.second);
     Output.push_back(std::move(Function));
   }
   return Output;
 }

 void fillScores(MutableArrayRef<FunctionData> Functions) {
   // A key to bucket throughput per function type and distribution.
   struct Key {
     FunctionType Type;
     StringRef Distribution;

     COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
   };

   // Tracks minimum and maximum values.
   struct MinMax {
     double Min = std::numeric_limits<double>::max();
     double Max = std::numeric_limits<double>::min();
     void update(double Value) {
       if (Value < Min)
         Min = Value;
       if (Value > Max)
         Max = Value;
     }
     double normalize(double Value) const { return (Value - Min) / (Max - Min); }
   };

   std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
   for (const auto &Function : Functions) {
     const FunctionType Type = Function.Id.Type;
     for (const auto &Pair : Function.PerDistributionData) {
       const auto &Distribution = Pair.getKey();
       const double Throughput = Pair.getValue().BytesPerSecondMedian;
       const Key K{Type, Distribution};
       ThroughputMinMax[K].update(Throughput);
     }
   }

   for (auto &Function : Functions) {
     const FunctionType Type = Function.Id.Type;
     for (const auto &Pair : Function.PerDistributionData) {
       const auto &Distribution = Pair.getKey();
       const double Throughput = Pair.getValue().BytesPerSecondMedian;
       const Key K{Type, Distribution};
       Function.PerDistributionData[Distribution].Score =
           ThroughputMinMax[K].normalize(Throughput);
     }
   }
 }

 void castVotes(MutableArrayRef<FunctionData> Functions) {
   for (FunctionData &Function : Functions) {
     Function.ScoresGeoMean = 1.0;
     for (const auto &Pair : Function.PerDistributionData) {
       const StringRef Distribution = Pair.getKey();
       const double Score = Pair.getValue().Score;
       Function.ScoresGeoMean *= Score;
       const auto G = Grade::judge(Score);
       ++(Function.GradeHisto[G]);
       Function.PerDistributionData[Distribution].Grade = G;
     }
   }

   for (FunctionData &Function : Functions) {
     const auto &GradeHisto = Function.GradeHisto;
     const size_t Votes =
         std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
     const size_t MedianVote = Votes / 2;
     size_t CountedVotes = 0;
     Grade::GradeEnum MedianGrade = Grade::BAD;
     for (size_t I = 0; I < GradeHisto.size(); ++I) {
       CountedVotes += GradeHisto[I];
       if (CountedVotes > MedianVote) {
         MedianGrade = Grade::GradeEnum(I);
         break;
       }
     }
     Function.FinalGrade = MedianGrade;
   }
 }

 } // namespace automemcpy
 } // namespace llvm
	//===-- Analyze benchmark JSON files --------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	// This code analyzes the json file produced by the `automemcpy` binary.
	//
	// As a remainder, `automemcpy` will benchmark each autogenerated memory
	// functions against one of the predefined distributions available in the
	// `libc/benchmarks/distributions` folder.
	//
	// It works as follows:
	// - Reads one or more json files.
	// - If there are several runs for the same function and distribution, picks the
	// median throughput (aka `BytesPerSecond`).
	// - Aggregates the throughput per distributions and scores them from worst (0)
	// to best (1).
	// - Each distribution categorizes each function into one of the following
	// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
	// BAD.
	// - A process similar to the Majority Judgment voting system is used to `elect`
	// the best function. The histogram of grades is returned so we can
	// distinguish between functions with the same final grade. In the following
	// example both functions grade EXCELLENT but we may prefer the second one.
	//
	// \| \| EXCELLENT \| VERY_GOOD \| GOOD \| PASSABLE \| ...
	// \|------------\|-----------\|-----------\|------\|----------\| ...
	// \| Function_1 \| 7 \| 1 \| 2 \| \| ...
	// \| Function_2 \| 6 \| 4 \| \| \| ...

	#include "automemcpy/ResultAnalyzer.h"
	#include "llvm/ADT/StringRef.h"
	#include <numeric>
	#include <unordered_map>

	namespace llvm {

	namespace automemcpy {

	StringRef Grade::getString(const GradeEnum &GE) {
	switch (GE) {
	case EXCELLENT:
	return "EXCELLENT";
	case VERY_GOOD:
	return "VERY_GOOD";
	case GOOD:
	return "GOOD";
	case PASSABLE:
	return "PASSABLE";
	case INADEQUATE:
	return "INADEQUATE";
	case MEDIOCRE:
	return "MEDIOCRE";
	case BAD:
	return "BAD";
	case ARRAY_SIZE:
	report_fatal_error("logic error");
	}
	}

	Grade::GradeEnum Grade::judge(double Score) {
	if (Score >= 6. / 7)
	return EXCELLENT;
	if (Score >= 5. / 7)
	return VERY_GOOD;
	if (Score >= 4. / 7)
	return GOOD;
	if (Score >= 3. / 7)
	return PASSABLE;
	if (Score >= 2. / 7)
	return INADEQUATE;
	if (Score >= 1. / 7)
	return MEDIOCRE;
	return BAD;
	}

	static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
	const double SampleMean) {
	assert(!Samples.empty());
	if (Samples.size() == 1)
	return 0;
	double DiffSquaresSum = 0;
	for (const double S : Samples) {
	const double Diff = S - SampleMean;
	DiffSquaresSum += Diff * Diff;
	}
	return DiffSquaresSum / (Samples.size() - 1);
	}

	static void processPerDistributionData(PerDistributionData &Data) {
	auto &Samples = Data.BytesPerSecondSamples;
	assert(!Samples.empty());
	// Sample Mean
	const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
	Data.BytesPerSecondMean = Sum / Samples.size();
	// Unbiased Sample Variance
	Data.BytesPerSecondVariance =
	computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
	// Median
	const size_t HalfSize = Samples.size() / 2;
	std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
	Data.BytesPerSecondMedian = Samples[HalfSize];
	}

	std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
	std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
	for (const auto &S : Samples) {
	if (S.Type != SampleType::ITERATION)
	break;
	auto &Function = Functions[S.Id.Function];
	auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
	Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
	}

	std::vector<FunctionData> Output;
	for (auto &[FunctionId, Function] : Functions) {
	Function.Id = FunctionId;
	for (auto &Pair : Function.PerDistributionData)
	processPerDistributionData(Pair.second);
	Output.push_back(std::move(Function));
	}
	return Output;
	}

	void fillScores(MutableArrayRef<FunctionData> Functions) {
	// A key to bucket throughput per function type and distribution.
	struct Key {
	FunctionType Type;
	StringRef Distribution;

	COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
	};

	// Tracks minimum and maximum values.
	struct MinMax {
	double Min = std::numeric_limits<double>::max();
	double Max = std::numeric_limits<double>::min();
	void update(double Value) {
	if (Value < Min)
	Min = Value;
	if (Value > Max)
	Max = Value;
	}
	double normalize(double Value) const { return (Value - Min) / (Max - Min); }
	};

	std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
	for (const auto &Function : Functions) {
	const FunctionType Type = Function.Id.Type;
	for (const auto &Pair : Function.PerDistributionData) {
	const auto &Distribution = Pair.getKey();
	const double Throughput = Pair.getValue().BytesPerSecondMedian;
	const Key K{Type, Distribution};
	ThroughputMinMax[K].update(Throughput);
	}
	}

	for (auto &Function : Functions) {
	const FunctionType Type = Function.Id.Type;
	for (const auto &Pair : Function.PerDistributionData) {
	const auto &Distribution = Pair.getKey();
	const double Throughput = Pair.getValue().BytesPerSecondMedian;
	const Key K{Type, Distribution};
	Function.PerDistributionData[Distribution].Score =
	ThroughputMinMax[K].normalize(Throughput);
	}
	}
	}

	void castVotes(MutableArrayRef<FunctionData> Functions) {
	for (FunctionData &Function : Functions) {
	Function.ScoresGeoMean = 1.0;
	for (const auto &Pair : Function.PerDistributionData) {
	const StringRef Distribution = Pair.getKey();
	const double Score = Pair.getValue().Score;
	Function.ScoresGeoMean *= Score;
	const auto G = Grade::judge(Score);
	++(Function.GradeHisto[G]);
	Function.PerDistributionData[Distribution].Grade = G;
	}
	}

	for (FunctionData &Function : Functions) {
	const auto &GradeHisto = Function.GradeHisto;
	const size_t Votes =
	std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
	const size_t MedianVote = Votes / 2;
	size_t CountedVotes = 0;
	Grade::GradeEnum MedianGrade = Grade::BAD;
	for (size_t I = 0; I < GradeHisto.size(); ++I) {
	CountedVotes += GradeHisto[I];
	if (CountedVotes > MedianVote) {
	MedianGrade = Grade::GradeEnum(I);
	break;
	}
	}
	Function.FinalGrade = MedianGrade;
	}
	}

	} // namespace automemcpy
	} // namespace llvm