|  | //===--- MisExpect.cpp - Check the use of llvm.expect with PGO data -------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This contains code to emit warnings for potentially incorrect usage of the | 
|  | // llvm.expect intrinsic. This utility extracts the threshold values from | 
|  | // metadata associated with the instrumented Branch or Switch instruction. The | 
|  | // threshold values are then used to determine if a warning should be emmited. | 
|  | // | 
|  | // MisExpect's implementation relies on two assumptions about how branch weights | 
|  | // are managed in LLVM. | 
|  | // | 
|  | // 1) Frontend profiling weights are always in place before llvm.expect is | 
|  | // lowered in LowerExpectIntrinsic.cpp. Frontend based instrumentation therefore | 
|  | // needs to extract the branch weights and then compare them to the weights | 
|  | // being added by the llvm.expect intrinsic lowering. | 
|  | // | 
|  | // 2) Sampling and IR based profiles will *only* have branch weight metadata | 
|  | // before profiling data is consulted if they are from a lowered llvm.expect | 
|  | // intrinsic. These profiles thus always extract the expected weights and then | 
|  | // compare them to the weights collected during profiling to determine if a | 
|  | // diagnostic message is warranted. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/Transforms/Utils/MisExpect.h" | 
|  | #include "llvm/ADT/Twine.h" | 
|  | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | 
|  | #include "llvm/IR/Constants.h" | 
|  | #include "llvm/IR/DiagnosticInfo.h" | 
|  | #include "llvm/IR/Instruction.h" | 
|  | #include "llvm/IR/Instructions.h" | 
|  | #include "llvm/IR/LLVMContext.h" | 
|  | #include "llvm/IR/ProfDataUtils.h" | 
|  | #include "llvm/Support/BranchProbability.h" | 
|  | #include "llvm/Support/CommandLine.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Support/FormatVariadic.h" | 
|  | #include <algorithm> | 
|  | #include <cstdint> | 
|  | #include <functional> | 
|  | #include <numeric> | 
|  |  | 
|  | #define DEBUG_TYPE "misexpect" | 
|  |  | 
|  | using namespace llvm; | 
|  | using namespace misexpect; | 
|  |  | 
|  | namespace llvm { | 
|  |  | 
|  | // Command line option to enable/disable the warning when profile data suggests | 
|  | // a mismatch with the use of the llvm.expect intrinsic | 
|  | static cl::opt<bool> PGOWarnMisExpect( | 
|  | "pgo-warn-misexpect", cl::init(false), cl::Hidden, | 
|  | cl::desc("Use this option to turn on/off " | 
|  | "warnings about incorrect usage of llvm.expect intrinsics.")); | 
|  |  | 
|  | static cl::opt<uint32_t> MisExpectTolerance( | 
|  | "misexpect-tolerance", cl::init(0), | 
|  | cl::desc("Prevents emiting diagnostics when profile counts are " | 
|  | "within N% of the threshold..")); | 
|  |  | 
|  | } // namespace llvm | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | bool isMisExpectDiagEnabled(LLVMContext &Ctx) { | 
|  | return PGOWarnMisExpect || Ctx.getMisExpectWarningRequested(); | 
|  | } | 
|  |  | 
|  | uint32_t getMisExpectTolerance(LLVMContext &Ctx) { | 
|  | return std::max(static_cast<uint32_t>(MisExpectTolerance), | 
|  | Ctx.getDiagnosticsMisExpectTolerance()); | 
|  | } | 
|  |  | 
|  | Instruction *getInstCondition(Instruction *I) { | 
|  | assert(I != nullptr && "MisExpect target Instruction cannot be nullptr"); | 
|  | Instruction *Ret = nullptr; | 
|  | if (auto *B = dyn_cast<BranchInst>(I)) { | 
|  | Ret = dyn_cast<Instruction>(B->getCondition()); | 
|  | } | 
|  | // TODO: Find a way to resolve condition location for switches | 
|  | // Using the condition of the switch seems to often resolve to an earlier | 
|  | // point in the program, i.e. the calculation of the switch condition, rather | 
|  | // than the switch's location in the source code. Thus, we should use the | 
|  | // instruction to get source code locations rather than the condition to | 
|  | // improve diagnostic output, such as the caret. If the same problem exists | 
|  | // for branch instructions, then we should remove this function and directly | 
|  | // use the instruction | 
|  | // | 
|  | else if (auto *S = dyn_cast<SwitchInst>(I)) { | 
|  | Ret = dyn_cast<Instruction>(S->getCondition()); | 
|  | } | 
|  | return Ret ? Ret : I; | 
|  | } | 
|  |  | 
|  | void emitMisexpectDiagnostic(Instruction *I, LLVMContext &Ctx, | 
|  | uint64_t ProfCount, uint64_t TotalCount) { | 
|  | double PercentageCorrect = (double)ProfCount / TotalCount; | 
|  | auto PerString = | 
|  | formatv("{0:P} ({1} / {2})", PercentageCorrect, ProfCount, TotalCount); | 
|  | auto RemStr = formatv( | 
|  | "Potential performance regression from use of the llvm.expect intrinsic: " | 
|  | "Annotation was correct on {0} of profiled executions.", | 
|  | PerString); | 
|  | Twine Msg(PerString); | 
|  | Instruction *Cond = getInstCondition(I); | 
|  | if (isMisExpectDiagEnabled(Ctx)) | 
|  | Ctx.diagnose(DiagnosticInfoMisExpect(Cond, Msg)); | 
|  | OptimizationRemarkEmitter ORE(I->getParent()->getParent()); | 
|  | ORE.emit(OptimizationRemark(DEBUG_TYPE, "misexpect", Cond) << RemStr.str()); | 
|  | } | 
|  |  | 
|  | } // namespace | 
|  |  | 
|  | namespace llvm { | 
|  | namespace misexpect { | 
|  |  | 
|  | void verifyMisExpect(Instruction &I, ArrayRef<uint32_t> RealWeights, | 
|  | ArrayRef<uint32_t> ExpectedWeights) { | 
|  | // To determine if we emit a diagnostic, we need to compare the branch weights | 
|  | // from the profile to those added by the llvm.expect intrinsic. | 
|  | // So first, we extract the "likely" and "unlikely" weights from | 
|  | // ExpectedWeights And determine the correct weight in the profile to compare | 
|  | // against. | 
|  | uint64_t LikelyBranchWeight = 0, | 
|  | UnlikelyBranchWeight = std::numeric_limits<uint32_t>::max(); | 
|  | size_t MaxIndex = 0; | 
|  | for (size_t Idx = 0, End = ExpectedWeights.size(); Idx < End; Idx++) { | 
|  | uint32_t V = ExpectedWeights[Idx]; | 
|  | if (LikelyBranchWeight < V) { | 
|  | LikelyBranchWeight = V; | 
|  | MaxIndex = Idx; | 
|  | } | 
|  | if (UnlikelyBranchWeight > V) { | 
|  | UnlikelyBranchWeight = V; | 
|  | } | 
|  | } | 
|  |  | 
|  | const uint64_t ProfiledWeight = RealWeights[MaxIndex]; | 
|  | const uint64_t RealWeightsTotal = | 
|  | std::accumulate(RealWeights.begin(), RealWeights.end(), (uint64_t)0, | 
|  | std::plus<uint64_t>()); | 
|  | const uint64_t NumUnlikelyTargets = RealWeights.size() - 1; | 
|  |  | 
|  | uint64_t TotalBranchWeight = | 
|  | LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets); | 
|  |  | 
|  | // FIXME: When we've addressed sample profiling, restore the assertion | 
|  | // | 
|  | // We cannot calculate branch probability if either of these invariants aren't | 
|  | // met. However, MisExpect diagnostics should not prevent code from compiling, | 
|  | // so we simply forgo emitting diagnostics here, and return early. | 
|  | // assert((TotalBranchWeight >= LikelyBranchWeight) && (TotalBranchWeight > 0) | 
|  | //              && "TotalBranchWeight is less than the Likely branch weight"); | 
|  | if ((TotalBranchWeight == 0) || (TotalBranchWeight <= LikelyBranchWeight)) | 
|  | return; | 
|  |  | 
|  | // To determine our threshold value we need to obtain the branch probability | 
|  | // for the weights added by llvm.expect and use that proportion to calculate | 
|  | // our threshold based on the collected profile data. | 
|  | auto LikelyProbablilty = BranchProbability::getBranchProbability( | 
|  | LikelyBranchWeight, TotalBranchWeight); | 
|  |  | 
|  | uint64_t ScaledThreshold = LikelyProbablilty.scale(RealWeightsTotal); | 
|  |  | 
|  | // clamp tolerance range to [0, 100) | 
|  | auto Tolerance = getMisExpectTolerance(I.getContext()); | 
|  | Tolerance = std::clamp(Tolerance, 0u, 99u); | 
|  |  | 
|  | // Allow users to relax checking by N%  i.e., if they use a 5% tolerance, | 
|  | // then we check against 0.95*ScaledThreshold | 
|  | if (Tolerance > 0) | 
|  | ScaledThreshold *= (1.0 - Tolerance / 100.0); | 
|  |  | 
|  | // When the profile weight is below the threshold, we emit the diagnostic | 
|  | if (ProfiledWeight < ScaledThreshold) | 
|  | emitMisexpectDiagnostic(&I, I.getContext(), ProfiledWeight, | 
|  | RealWeightsTotal); | 
|  | } | 
|  |  | 
|  | void checkBackendInstrumentation(Instruction &I, | 
|  | const ArrayRef<uint32_t> RealWeights) { | 
|  | SmallVector<uint32_t> ExpectedWeights; | 
|  | if (!extractBranchWeights(I, ExpectedWeights)) | 
|  | return; | 
|  | verifyMisExpect(I, RealWeights, ExpectedWeights); | 
|  | } | 
|  |  | 
|  | void checkFrontendInstrumentation(Instruction &I, | 
|  | const ArrayRef<uint32_t> ExpectedWeights) { | 
|  | SmallVector<uint32_t> RealWeights; | 
|  | if (!extractBranchWeights(I, RealWeights)) | 
|  | return; | 
|  | verifyMisExpect(I, RealWeights, ExpectedWeights); | 
|  | } | 
|  |  | 
|  | void checkExpectAnnotations(Instruction &I, | 
|  | const ArrayRef<uint32_t> ExistingWeights, | 
|  | bool IsFrontend) { | 
|  | if (IsFrontend) { | 
|  | checkFrontendInstrumentation(I, ExistingWeights); | 
|  | } else { | 
|  | checkBackendInstrumentation(I, ExistingWeights); | 
|  | } | 
|  | } | 
|  |  | 
|  | } // namespace misexpect | 
|  | } // namespace llvm | 
|  | #undef DEBUG_TYPE |