llvm/lib/Transforms/Utils/MisExpect.cpp - llvm-project - Git at Google

 //===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This contains code to emit warnings for potentially incorrect usage of the
 // llvm.expect intrinsic. This utility extracts the threshold values from
 // metadata associated with the instrumented Branch or Switch instruction. The
 // threshold values are then used to determine if a warning should be emmited.
 //
 // MisExpect's implementation relies on two assumptions about how branch weights
 // are managed in LLVM.
 //
 // 1) Frontend profiling weights are always in place before llvm.expect is
 // lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore
 // needs to extract the branch weights and then compare them to the weights
 // being added by the llvm.expect intrinsic lowering.
 //
 // 2) Sampling and IR based profiles will *only* have branch weight metadata
 // before profiling data is consulted if they are from a lowered llvm.expect
 // intrinsic. These profiles thus always extract the expected weights and then
 // compare them to the weights collected during profiling to determine if a
 // diagnostic message is warranted.
 //
 //===----------------------------------------------------------------------===//

 #include "llvm/Transforms/Utils/MisExpect.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/ProfDataUtils.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormatVariadic.h"
 #include <algorithm>
 #include <cstdint>
 #include <functional>
 #include <numeric>

 #define DEBUG_TYPE "misexpect"

 using namespace llvm;
 using namespace misexpect;

 // Command line option to enable/disable the warning when profile data suggests
 // a mismatch with the use of the llvm.expect intrinsic
 static cl::opt<bool> PGOWarnMisExpect(
     "pgo-warn-misexpect", cl::init(false), cl::Hidden,
     cl::desc("Use this option to turn on/off "
              "warnings about incorrect usage of llvm.expect intrinsics."));

 // Command line option for setting the diagnostic tolerance threshold
 static cl::opt<uint32_t> MisExpectTolerance(
     "misexpect-tolerance", cl::init(0),
     cl::desc("Prevents emitting diagnostics when profile counts are "
              "within N% of the threshold.."));

 static bool isMisExpectDiagEnabled(const LLVMContext &Ctx) {
   return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested();
 }

 static uint32_t getMisExpectTolerance(const LLVMContext &Ctx) {
   return std::max(static_cast<uint32_t>(MisExpectTolerance),
                   Ctx.getDiagnosticsMisExpectTolerance());
 }

 static const Instruction *getInstCondition(const Instruction *I) {
   assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
   const Instruction *Ret = nullptr;
   if (auto *B = dyn_cast<BranchInst>(I)) {
     Ret = dyn_cast<Instruction>(B->getCondition());
   }
   // TODO: Find a way to resolve condition location for switches
   // Using the condition of the switch seems to often resolve to an earlier
   // point in the program, i.e. the calculation of the switch condition, rather
   // than the switch's location in the source code. Thus, we should use the
   // instruction to get source code locations rather than the condition to
   // improve diagnostic output, such as the caret. If the same problem exists
   // for branch instructions, then we should remove this function and directly
   // use the instruction
   //
   else if (auto *S = dyn_cast<SwitchInst>(I)) {
     Ret = dyn_cast<Instruction>(S->getCondition());
   }
   return Ret ? Ret : I;
 }

 static void emitMisexpectDiagnostic(const Instruction *I, LLVMContext &Ctx,
                                     uint64_t ProfCount, uint64_t TotalCount) {
   double PercentageCorrect = (double)ProfCount / TotalCount;
   auto PerString =
       formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
   auto RemStr = formatv(
       "Potential performance regression from use of the llvm.expect intrinsic: "
       "Annotation was correct on {0} of profiled executions.",
       PerString);
   const Instruction *Cond = getInstCondition(I);
   if (isMisExpectDiagEnabled(Ctx))
     Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Twine(PerString)));
   OptimizationRemarkEmitter ORE(I->getParent()->getParent());
   ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
 }

 void misexpect::verifyMisExpect(const Instruction &I,
                                 ArrayRef<uint32_t> RealWeights,
                                 ArrayRef<uint32_t> ExpectedWeights) {
   // To determine if we emit a diagnostic, we need to compare the branch weights
   // from the profile to those added by the llvm.expect intrinsic.
   // So first, we extract the "likely" and "unlikely" weights from
   // ExpectedWeights And determine the correct weight in the profile to compare
   // against.
   uint64_t LikelyBranchWeight = 0,
            UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
   size_t MaxIndex = 0;
   for (const auto &[Idx, V] : enumerate(ExpectedWeights)) {
     if (LikelyBranchWeight < V) {
       LikelyBranchWeight = V;
       MaxIndex = Idx;
     }
     if (UnlikelyBranchWeight > V)
       UnlikelyBranchWeight = V;
   }

   const uint64_t ProfiledWeight = RealWeights[MaxIndex];
   const uint64_t RealWeightsTotal =
       std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0,
                       std::plus<uint64_t>());
   const uint64_t NumUnlikelyTargets = RealWeights.size() - 1;

   uint64_t TotalBranchWeight =
       LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);

   // Failing this assert means that we have corrupted metadata.
   assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) &&
          "TotalBranchWeight is less than the Likely branch weight");

   // To determine our threshold value we need to obtain the branch probability
   // for the weights added by llvm.expect and use that proportion to calculate
   // our threshold based on the collected profile data.
   auto LikelyProbablilty = BranchProbability::getBranchProbability(
       LikelyBranchWeight, TotalBranchWeight);

   uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);

   // clamp tolerance range to [0, 100)
   uint32_t Tolerance = getMisExpectTolerance(I.getContext());
   Tolerance = std::clamp(Tolerance, 0u, 99u);

   // Allow users to relax checking by N%  i.e., if they use a 5% tolerance,
   // then we check against 0.95*ScaledThreshold
   if (Tolerance > 0)
     ScaledThreshold *= (1.0 - Tolerance / 100.0);

   // When the profile weight is below the threshold, we emit the diagnostic
   if (ProfiledWeight < ScaledThreshold)
     emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight,
                             RealWeightsTotal);
 }

 void misexpect::checkBackendInstrumentation(const Instruction &I,
                                             ArrayRef<uint32_t> RealWeights) {
   // Backend checking assumes any existing weight comes from an `llvm.expect`
   // intrinsic. However, SampleProfiling + ThinLTO add branch weights  multiple
   // times, leading to an invalid assumption in our checking. Backend checks
   // should only operate on branch weights that carry the "!expected" field,
   // since they are guaranteed to be added by the LowerExpectIntrinsic pass.
   if (!hasBranchWeightOrigin(I))
     return;
   SmallVector<uint32_t> ExpectedWeights;
   if (!extractBranchWeights(I, ExpectedWeights))
     return;
   verifyMisExpect(I, RealWeights, ExpectedWeights);
 }

 void misexpect::checkFrontendInstrumentation(
     const Instruction &I, ArrayRef<uint32_t> ExpectedWeights) {
   SmallVector<uint32_t> RealWeights;
   if (!extractBranchWeights(I, RealWeights))
     return;
   verifyMisExpect(I, RealWeights, ExpectedWeights);
 }

 void misexpect::checkExpectAnnotations(const Instruction &I,
                                        ArrayRef<uint32_t> ExistingWeights,
                                        bool IsFrontend) {
   if (IsFrontend)
     checkFrontendInstrumentation(I, ExistingWeights);
   else
     checkBackendInstrumentation(I, ExistingWeights);
 }
	//===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This contains code to emit warnings for potentially incorrect usage of the
	// llvm.expect intrinsic. This utility extracts the threshold values from
	// metadata associated with the instrumented Branch or Switch instruction. The
	// threshold values are then used to determine if a warning should be emmited.
	//
	// MisExpect's implementation relies on two assumptions about how branch weights
	// are managed in LLVM.
	//
	// 1) Frontend profiling weights are always in place before llvm.expect is
	// lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore
	// needs to extract the branch weights and then compare them to the weights
	// being added by the llvm.expect intrinsic lowering.
	//
	// 2) Sampling and IR based profiles will only have branch weight metadata
	// before profiling data is consulted if they are from a lowered llvm.expect
	// intrinsic. These profiles thus always extract the expected weights and then
	// compare them to the weights collected during profiling to determine if a
	// diagnostic message is warranted.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Transforms/Utils/MisExpect.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/ProfDataUtils.h"
	#include "llvm/Support/BranchProbability.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/FormatVariadic.h"
	#include <algorithm>
	#include <cstdint>
	#include <functional>
	#include <numeric>

	#define DEBUG_TYPE "misexpect"

	using namespace llvm;
	using namespace misexpect;

	// Command line option to enable/disable the warning when profile data suggests
	// a mismatch with the use of the llvm.expect intrinsic
	static cl::opt<bool> PGOWarnMisExpect(
	"pgo-warn-misexpect", cl::init(false), cl::Hidden,
	cl::desc("Use this option to turn on/off "
	"warnings about incorrect usage of llvm.expect intrinsics."));

	// Command line option for setting the diagnostic tolerance threshold
	static cl::opt<uint32_t> MisExpectTolerance(
	"misexpect-tolerance", cl::init(0),
	cl::desc("Prevents emitting diagnostics when profile counts are "
	"within N% of the threshold.."));

	static bool isMisExpectDiagEnabled(const LLVMContext &Ctx) {
	return PGOWarnMisExpect \|\| Ctx.getMisExpectWarningRequested();
	}

	static uint32_t getMisExpectTolerance(const LLVMContext &Ctx) {
	return std::max(static_cast<uint32_t>(MisExpectTolerance),
	Ctx.getDiagnosticsMisExpectTolerance());
	}

	static const Instruction getInstCondition(const Instruction I) {
	assert(I != nullptr && "MisExpect target Instruction cannot be nullptr");
	const Instruction *Ret = nullptr;
	if (auto *B = dyn_cast<BranchInst>(I)) {
	Ret = dyn_cast<Instruction>(B->getCondition());
	}
	// TODO: Find a way to resolve condition location for switches
	// Using the condition of the switch seems to often resolve to an earlier
	// point in the program, i.e. the calculation of the switch condition, rather
	// than the switch's location in the source code. Thus, we should use the
	// instruction to get source code locations rather than the condition to
	// improve diagnostic output, such as the caret. If the same problem exists
	// for branch instructions, then we should remove this function and directly
	// use the instruction
	//
	else if (auto *S = dyn_cast<SwitchInst>(I)) {
	Ret = dyn_cast<Instruction>(S->getCondition());
	}
	return Ret ? Ret : I;
	}

	static void emitMisexpectDiagnostic(const Instruction *I, LLVMContext &Ctx,
	uint64_t ProfCount, uint64_t TotalCount) {
	double PercentageCorrect = (double)ProfCount / TotalCount;
	auto PerString =
	formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount);
	auto RemStr = formatv(
	"Potential performance regression from use of the llvm.expect intrinsic: "
	"Annotation was correct on {0} of profiled executions.",
	PerString);
	const Instruction *Cond = getInstCondition(I);
	if (isMisExpectDiagEnabled(Ctx))
	Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Twine(PerString)));
	OptimizationRemarkEmitter ORE(I->getParent()->getParent());
	ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str());
	}

	void misexpect::verifyMisExpect(const Instruction &I,
	ArrayRef<uint32_t> RealWeights,
	ArrayRef<uint32_t> ExpectedWeights) {
	// To determine if we emit a diagnostic, we need to compare the branch weights
	// from the profile to those added by the llvm.expect intrinsic.
	// So first, we extract the "likely" and "unlikely" weights from
	// ExpectedWeights And determine the correct weight in the profile to compare
	// against.
	uint64_t LikelyBranchWeight = 0,
	UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max();
	size_t MaxIndex = 0;
	for (const auto &[Idx, V] : enumerate(ExpectedWeights)) {
	if (LikelyBranchWeight < V) {
	LikelyBranchWeight = V;
	MaxIndex = Idx;
	}
	if (UnlikelyBranchWeight > V)
	UnlikelyBranchWeight = V;
	}

	const uint64_t ProfiledWeight = RealWeights[MaxIndex];
	const uint64_t RealWeightsTotal =
	std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0,
	std::plus<uint64_t>());
	const uint64_t NumUnlikelyTargets = RealWeights.size() - 1;

	uint64_t TotalBranchWeight =
	LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);

	// Failing this assert means that we have corrupted metadata.
	assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) &&
	"TotalBranchWeight is less than the Likely branch weight");

	// To determine our threshold value we need to obtain the branch probability
	// for the weights added by llvm.expect and use that proportion to calculate
	// our threshold based on the collected profile data.
	auto LikelyProbablilty = BranchProbability::getBranchProbability(
	LikelyBranchWeight, TotalBranchWeight);

	uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal);

	// clamp tolerance range to [0, 100)
	uint32_t Tolerance = getMisExpectTolerance(I.getContext());
	Tolerance = std::clamp(Tolerance, 0u, 99u);

	// Allow users to relax checking by N% i.e., if they use a 5% tolerance,
	// then we check against 0.95*ScaledThreshold
	if (Tolerance > 0)
	ScaledThreshold *= (1.0 - Tolerance / 100.0);

	// When the profile weight is below the threshold, we emit the diagnostic
	if (ProfiledWeight < ScaledThreshold)
	emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight,
	RealWeightsTotal);
	}

	void misexpect::checkBackendInstrumentation(const Instruction &I,
	ArrayRef<uint32_t> RealWeights) {
	// Backend checking assumes any existing weight comes from an `llvm.expect`
	// intrinsic. However, SampleProfiling + ThinLTO add branch weights multiple
	// times, leading to an invalid assumption in our checking. Backend checks
	// should only operate on branch weights that carry the "!expected" field,
	// since they are guaranteed to be added by the LowerExpectIntrinsic pass.
	if (!hasBranchWeightOrigin(I))
	return;
	SmallVector<uint32_t> ExpectedWeights;
	if (!extractBranchWeights(I, ExpectedWeights))
	return;
	verifyMisExpect(I, RealWeights, ExpectedWeights);
	}

	void misexpect::checkFrontendInstrumentation(
	const Instruction &I, ArrayRef<uint32_t> ExpectedWeights) {
	SmallVector<uint32_t> RealWeights;
	if (!extractBranchWeights(I, RealWeights))
	return;
	verifyMisExpect(I, RealWeights, ExpectedWeights);
	}

	void misexpect::checkExpectAnnotations(const Instruction &I,
	ArrayRef<uint32_t> ExistingWeights,
	bool IsFrontend) {
	if (IsFrontend)
	checkFrontendInstrumentation(I, ExistingWeights);
	else
	checkBackendInstrumentation(I, ExistingWeights);
	}