| //===- bolt/Passes/PAuthGadgetScanner.cpp ---------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a pass that analyzes code hardened using Pointer |
| // Authentication and looks for non-protected or insufficiently protected parts. |
| // While the existing implementation only applies to AArch64, it is intended |
| // to keep this file reasonably target-neutral, and place AArch64-specific |
| // hooks in AArch64MCPlusBuilder. |
| // |
| // Various gadget kinds (patterns of unsafe instruction usage) can be detected. |
| // Gadgets of the particular kind are detected by inspecting the susceptible |
| // instructions (such as "all return instructions" or "all indirect branches |
| // and calls") and validating properties of their operands. This is achieved |
| // by first running a dataflow analysis on the entire function to compute the |
| // properties of registers before or after each instruction is executed. Then, |
| // each instruction together with the computed state is passed to a number of |
| // gadget detectors, which consume the results of this particular analysis. |
| // If CFG information is not available for a particular function, a simplified |
| // analysis is run instead of a dataflow analysis. |
| // |
| // There are two broad groups of gadget detectors: |
| // * Those analyzing the input operands of the instructions. They consume |
| // SrcState holding properties of the registers prior to execution of the |
| // instruction. SrcState is computed by iterating forwards over the |
| // instructions, by DataflowSrcSafetyAnalysis class. If BOLT was unable to |
| // reconstruct the CFG for a particular function, CFGUnawareSrcSafetyAnalysis |
| // class is used instead. |
| // * Those analyzing the output operands of the instructions. They mirror the |
| // former group by consuming DstState corresponding to the state *after* |
| // execution of the instruction. Such state is computed by iterating |
| // *backwards* over the instructions by DataflowDstSafetyAnalysis or its |
| // CFG-unaware counterpart. |
| // |
| // Furthermore, when producing a diagnostic for a found gadget, this tool tries |
| // to provide the clues on which instructions made the operands unsafe (such as |
| // the set of last instructions that wrote an unsafe value to the register |
| // along various possible paths of execution leading to this instruction). |
| // This is achieved by re-running the same analysis for the second time to |
| // collect the detailed information to improve the reports produced on the |
| // first run. Since it is expected that most of the functions do not have any |
| // issues to be reported, the second analysis run which is more time- and |
| // memory-consuming is skipped for most functions. Please note that unlike |
| // the reports themselves, these clues are provided on a best-effort basis. |
| // |
| // Hierarchy of the analysis classes: |
| // |
| // SrcSafetyAnalysis DstSafetyAnalysis |
| // (computes `SrcState`s) (computes `DstState`s) |
| // | | | | |
| // | | DataflowAnalysis | | |
| // | | (provided by BOLT) | | |
| // | | | | | | |
| // | v v v v | |
| // | DataflowSrcSafetyAnalysis DataflowDstSafetyAnalysis | |
| // | | |
| // | | |
| // | CFGUnawareAnalysis | |
| // | (implemented in this file) | |
| // | | | | |
| // v v v v |
| // CFGUnawareSrcSafetyAnalysis CFGUnawareDstSafetyAnalysis |
| // |
| // Detector functions: |
| // |
| // shouldReportReturnGadget shouldReportAuthOracle |
| // shouldReportCallGadget |
| // ... |
| // |
| // Dispatched by (member functions of FunctionAnalysisContext): |
| // |
| // findUnsafeUses findUnsafeDefs |
| // handleSimpleReports handleSimpleReports |
| // augmentUnsafeUseReports augmentUnsafeDefReports |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "bolt/Passes/PAuthGadgetScanner.h" |
| #include "bolt/Core/ParallelUtilities.h" |
| #include "bolt/Passes/DataflowAnalysis.h" |
| #include "bolt/Utils/CommandLineOpts.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/MC/MCInst.h" |
| #include "llvm/Support/Format.h" |
| #include <memory> |
| |
| #define DEBUG_TYPE "bolt-pauth-scanner" |
| |
| namespace llvm { |
| namespace bolt { |
| namespace PAuthGadgetScanner { |
| |
| static cl::opt<bool> AuthTrapsOnFailure( |
| "auth-traps-on-failure", |
| cl::desc("Assume authentication instructions always trap on failure"), |
| cl::cat(opts::BinaryAnalysisCategory)); |
| |
| [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label, |
| const MCInst &MI) { |
| dbgs() << " " << Label << ": "; |
| BC.printInstruction(dbgs(), MI); |
| } |
| |
| [[maybe_unused]] static void traceReg(const BinaryContext &BC, StringRef Label, |
| MCPhysReg Reg) { |
| dbgs() << " " << Label << ": "; |
| if (Reg == BC.MIB->getNoRegister()) |
| dbgs() << "(none)"; |
| else |
| dbgs() << BC.MRI->getName(Reg); |
| dbgs() << "\n"; |
| } |
| |
| [[maybe_unused]] static void traceRegMask(const BinaryContext &BC, |
| StringRef Label, BitVector Mask) { |
| dbgs() << " " << Label << ": "; |
| RegStatePrinter(BC).print(dbgs(), Mask); |
| dbgs() << "\n"; |
| } |
| |
| // Iterates over BinaryFunction's instructions like a range-based for loop: |
| // |
| // iterateOverInstrs(BF, [&](MCInstReference Inst) { |
| // // loop body |
| // }); |
| template <typename T> static void iterateOverInstrs(BinaryFunction &BF, T Fn) { |
| if (BF.hasCFG()) { |
| for (BinaryBasicBlock &BB : BF) |
| for (int64_t I = 0, E = BB.size(); I < E; ++I) |
| Fn(MCInstReference(BB, I)); |
| } else { |
| for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) |
| Fn(MCInstReference(BF, I)); |
| } |
| } |
| |
| // This class represents mapping from a set of arbitrary physical registers to |
| // consecutive array indexes. |
| class TrackedRegisters { |
| static constexpr uint16_t NoIndex = -1; |
| const std::vector<MCPhysReg> Registers; |
| std::vector<uint16_t> RegToIndexMapping; |
| |
| static size_t getMappingSize(ArrayRef<MCPhysReg> RegsToTrack) { |
| if (RegsToTrack.empty()) |
| return 0; |
| return 1 + *llvm::max_element(RegsToTrack); |
| } |
| |
| public: |
| TrackedRegisters(ArrayRef<MCPhysReg> RegsToTrack) |
| : Registers(RegsToTrack), |
| RegToIndexMapping(getMappingSize(RegsToTrack), NoIndex) { |
| for (auto [MappedIndex, Reg] : llvm::enumerate(RegsToTrack)) |
| RegToIndexMapping[Reg] = MappedIndex; |
| } |
| |
| ArrayRef<MCPhysReg> getRegisters() const { return Registers; } |
| |
| size_t getNumRegisters() const { return Registers.size(); } |
| |
| bool empty() const { return Registers.empty(); } |
| |
| bool isTracked(MCPhysReg Reg) const { |
| bool IsTracked = (unsigned)Reg < RegToIndexMapping.size() && |
| RegToIndexMapping[Reg] != NoIndex; |
| assert(IsTracked == llvm::is_contained(Registers, Reg)); |
| return IsTracked; |
| } |
| |
| unsigned getIndex(MCPhysReg Reg) const { |
| assert(isTracked(Reg) && "Register is not tracked"); |
| return RegToIndexMapping[Reg]; |
| } |
| }; |
| |
| typedef SmallPtrSet<const MCInst *, 4> SetOfRelatedInsts; |
| |
| /// A state representing which registers are safe to use by an instruction |
| /// at a given program point. |
| /// |
| /// To simplify reasoning, let's stick with the following approach: |
| /// * when state is updated by the dataflow analysis, the sub-, super- and |
| /// overlapping registers are marked as needed |
| /// * when the particular instruction is checked if it represents a gadget, |
| /// the specific bit of BitVector should be usable to answer this. |
| /// |
| /// For example, on AArch64: |
| /// * An AUTIZA X0 instruction marks both X0 and W0 (as well as W0_HI) as |
| /// safe-to-dereference. It does not change the state of X0_X1, for example, |
| /// as super-registers partially retain their old, unsafe values. |
| /// * LDR X1, [X0] marks as unsafe both X1 itself and anything it overlaps |
| /// with: W1, W1_HI, X0_X1 and so on. |
| /// * RET (which is implicitly RET X30) is a protected return if and only if |
| /// X30 is safe-to-dereference - the state computed for sub- and |
| /// super-registers is not inspected. |
| struct SrcState { |
| /// A BitVector containing the registers that are either authenticated or |
| /// whose value is known not to be attacker-controlled under Pointer |
| /// Authentication threat model. If AuthTrapsOnFailure is false, a failed |
| /// authentication is permitted to produce an invalid address that generates |
| /// an error on memory access. The registers in this set are either |
| /// * not clobbered since being authenticated, or |
| /// * trusted at function entry and were not clobbered yet, or |
| /// * contain a safely materialized address. |
| /// |
| /// Safe-to-dereference registers are considered to be safe to use by the |
| /// instructions that perform memory access and generate an error on failed |
| /// address translation. These registers are not generally safe to be used |
| /// by the instructions like pointer signing, as such usage may hide the |
| /// authentication failure. |
| BitVector SafeToDerefRegs; |
| /// A BitVector containing the registers that are either authenticated |
| /// *successfully* or whose value is known not to be attacker-controlled |
| /// under Pointer Authentication threat model. |
| /// The registers in this set are either |
| /// * authenticated and then checked to be authenticated successfully |
| /// (and not clobbered since then), or |
| /// * trusted at function entry and were not clobbered yet, or |
| /// * contain a safely materialized address. |
| /// |
| /// When authentication instructions are assumed to always trap on error, |
| /// this is identical to SafeToDerefRegs. |
| BitVector TrustedRegs; |
| /// A vector of sets, only used on the second analysis run. |
| /// Each element in the vector represents one of the registers for which we |
| /// track the set of last instructions that wrote to this register, excluding |
| /// authentications. This is intended to provide best-effort clues on which |
| /// instruction caused the particular register not to be safe-to-dereference. |
| /// |
| /// Please note that the mapping from MCPhysReg values to indexes in this |
| /// vector is provided by RegsToTrack field of SrcSafetyAnalysis. |
| std::vector<SetOfRelatedInsts> LastInstWritingReg; |
| |
| /// Constructs an empty state (no registers at all). |
| SrcState() {} |
| |
| /// Constructs a new state with all registers marked unsafe. |
| SrcState(unsigned NumRegs, unsigned NumRegsToTrack) |
| : SafeToDerefRegs(NumRegs), TrustedRegs(NumRegs), |
| LastInstWritingReg(NumRegsToTrack) {} |
| |
| /// Updates *this to account for the state incoming from a predecessor basic |
| /// block (i.e. computes the least safe states among *this and StateIn). |
| SrcState &merge(const SrcState &StateIn) { |
| if (StateIn.empty()) |
| return *this; |
| if (empty()) |
| return (*this = StateIn); |
| |
| SafeToDerefRegs &= StateIn.SafeToDerefRegs; |
| TrustedRegs &= StateIn.TrustedRegs; |
| for (auto [ThisSet, OtherSet] : |
| llvm::zip_equal(LastInstWritingReg, StateIn.LastInstWritingReg)) |
| ThisSet.insert_range(OtherSet); |
| return *this; |
| } |
| |
| /// Returns true if this object does not store state of any registers - |
| /// neither safe, nor unsafe ones. |
| bool empty() const { return SafeToDerefRegs.empty(); } |
| |
| bool operator==(const SrcState &RHS) const { |
| return SafeToDerefRegs == RHS.SafeToDerefRegs && |
| TrustedRegs == RHS.TrustedRegs && |
| LastInstWritingReg == RHS.LastInstWritingReg; |
| } |
| bool operator!=(const SrcState &RHS) const { return !((*this) == RHS); } |
| }; |
| |
| static void printInstsShort(raw_ostream &OS, |
| ArrayRef<SetOfRelatedInsts> Insts) { |
| OS << "Insts: "; |
| for (auto [I, PtrSet] : llvm::enumerate(Insts)) { |
| OS << "[" << I << "]("; |
| interleave(PtrSet, OS, " "); |
| OS << ")"; |
| } |
| } |
| |
| static raw_ostream &operator<<(raw_ostream &OS, const SrcState &S) { |
| OS << "src-state<"; |
| if (S.empty()) { |
| OS << "empty"; |
| } else { |
| OS << "SafeToDerefRegs: " << S.SafeToDerefRegs << ", "; |
| OS << "TrustedRegs: " << S.TrustedRegs << ", "; |
| printInstsShort(OS, S.LastInstWritingReg); |
| } |
| OS << ">"; |
| return OS; |
| } |
| |
| class SrcStatePrinter { |
| public: |
| void print(raw_ostream &OS, const SrcState &State) const; |
| explicit SrcStatePrinter(const BinaryContext &BC) : BC(BC) {} |
| |
| private: |
| const BinaryContext &BC; |
| }; |
| |
| void SrcStatePrinter::print(raw_ostream &OS, const SrcState &S) const { |
| RegStatePrinter RegStatePrinter(BC); |
| OS << "src-state<"; |
| if (S.empty()) { |
| assert(S.SafeToDerefRegs.empty()); |
| assert(S.TrustedRegs.empty()); |
| assert(S.LastInstWritingReg.empty()); |
| OS << "empty"; |
| } else { |
| OS << "SafeToDerefRegs: "; |
| RegStatePrinter.print(OS, S.SafeToDerefRegs); |
| OS << ", TrustedRegs: "; |
| RegStatePrinter.print(OS, S.TrustedRegs); |
| OS << ", "; |
| printInstsShort(OS, S.LastInstWritingReg); |
| } |
| OS << ">"; |
| } |
| |
| /// Computes which registers are safe to be used by control flow and signing |
| /// instructions. |
| /// |
| /// This is the base class for two implementations: a dataflow-based analysis |
| /// which is intended to be used for most functions and a simplified CFG-unaware |
| /// version for functions without reconstructed CFG. |
| class SrcSafetyAnalysis { |
| public: |
| SrcSafetyAnalysis(BinaryFunction &BF, ArrayRef<MCPhysReg> RegsToTrack) |
| : BC(BF.getBinaryContext()), NumRegs(BC.MRI->getNumRegs()), |
| RegsToTrack(RegsToTrack) {} |
| |
| virtual ~SrcSafetyAnalysis() {} |
| |
| static std::shared_ptr<SrcSafetyAnalysis> |
| create(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId, |
| ArrayRef<MCPhysReg> RegsToTrack); |
| |
| virtual void run() = 0; |
| virtual const SrcState &getStateBefore(const MCInst &Inst) const = 0; |
| |
| protected: |
| BinaryContext &BC; |
| const unsigned NumRegs; |
| |
| /// The set of registers for which the dataflow analysis must compute the set |
| /// of last writing instructions. |
| const TrackedRegisters RegsToTrack; |
| |
| /// Stores information about the detected instruction sequences emitted to |
| /// check an authenticated pointer. Specifically, if such sequence is detected |
| /// in a basic block, it maps the last instruction of that basic block to |
| /// (CheckedRegister, FirstInstOfTheSequence) pair, see the description of |
| /// MCPlusBuilder::getAuthCheckedReg(BB) method. |
| /// |
| /// As the detection of such sequences requires iterating over the adjacent |
| /// instructions, it should be done before calling computeNext(), which |
| /// operates on separate instructions. |
| DenseMap<const MCInst *, std::pair<MCPhysReg, const MCInst *>> |
| CheckerSequenceInfo; |
| |
| SetOfRelatedInsts &lastWritingInsts(SrcState &S, MCPhysReg Reg) const { |
| unsigned Index = RegsToTrack.getIndex(Reg); |
| return S.LastInstWritingReg[Index]; |
| } |
| const SetOfRelatedInsts &lastWritingInsts(const SrcState &S, |
| MCPhysReg Reg) const { |
| unsigned Index = RegsToTrack.getIndex(Reg); |
| return S.LastInstWritingReg[Index]; |
| } |
| |
| /// Computes SrcState observed on function entry. |
| SrcState createEntryState() { |
| SrcState S(NumRegs, RegsToTrack.getNumRegisters()); |
| for (MCPhysReg Reg : BC.MIB->getTrustedLiveInRegs()) |
| S.TrustedRegs |= BC.MIB->getAliases(Reg, /*OnlySmaller=*/true); |
| S.SafeToDerefRegs = S.TrustedRegs; |
| return S; |
| } |
| |
| /// Computes a reasonably pessimistic estimation of the register state when |
| /// the previous instruction is not known for sure. Takes the set of registers |
| /// which are trusted at function entry and removes all registers that can be |
| /// clobbered inside this function. |
| SrcState computePessimisticState(BinaryFunction &BF) { |
| BitVector ClobberedRegs(NumRegs); |
| iterateOverInstrs(BF, [&](MCInstReference Inst) { |
| BC.MIB->getClobberedRegs(Inst, ClobberedRegs); |
| |
| // If this is a call instruction, no register is safe anymore, unless |
| // it is a tail call. Ignore tail calls for the purpose of estimating the |
| // worst-case scenario, assuming no instructions are executed in the |
| // caller after this point anyway. |
| if (BC.MIB->isCall(Inst) && !BC.MIB->isTailCall(Inst)) |
| ClobberedRegs.set(); |
| }); |
| |
| SrcState S = createEntryState(); |
| S.SafeToDerefRegs.reset(ClobberedRegs); |
| S.TrustedRegs.reset(ClobberedRegs); |
| return S; |
| } |
| |
| BitVector getClobberedRegs(const MCInst &Point) const { |
| BitVector Clobbered(NumRegs); |
| // Assume a call can clobber all registers, including callee-saved |
| // registers. There's a good chance that callee-saved registers will be |
| // saved on the stack at some point during execution of the callee. |
| // Therefore they should also be considered as potentially modified by an |
| // attacker/written to. |
| if (BC.MIB->isCall(Point)) |
| Clobbered.set(); |
| else |
| BC.MIB->getClobberedRegs(Point, Clobbered); |
| return Clobbered; |
| } |
| |
| std::optional<MCPhysReg> getRegMadeTrustedByChecking(const MCInst &Inst, |
| SrcState Cur) const { |
| // This function cannot return multiple registers. This is never the case |
| // on AArch64. |
| std::optional<MCPhysReg> RegCheckedByInst = |
| BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/false); |
| if (RegCheckedByInst && Cur.SafeToDerefRegs[*RegCheckedByInst]) |
| return *RegCheckedByInst; |
| |
| auto It = CheckerSequenceInfo.find(&Inst); |
| if (It == CheckerSequenceInfo.end()) |
| return std::nullopt; |
| |
| MCPhysReg RegCheckedBySequence = It->second.first; |
| const MCInst *FirstCheckerInst = It->second.second; |
| |
| // FirstCheckerInst should belong to the same basic block (see the |
| // assertion in DataflowSrcSafetyAnalysis::run()), meaning it was |
| // deterministically processed a few steps before this instruction. |
| const SrcState &StateBeforeChecker = getStateBefore(*FirstCheckerInst); |
| |
| // The sequence checks the register, but it should be authenticated before. |
| if (!StateBeforeChecker.SafeToDerefRegs[RegCheckedBySequence]) |
| return std::nullopt; |
| |
| return RegCheckedBySequence; |
| } |
| |
| // Returns all registers that can be treated as if they are written by an |
| // authentication instruction. |
| SmallVector<MCPhysReg> getRegsMadeSafeToDeref(const MCInst &Point, |
| const SrcState &Cur) const { |
| SmallVector<MCPhysReg> Regs; |
| |
| // A signed pointer can be authenticated, ... |
| bool Dummy = false; |
| if (auto AutReg = BC.MIB->getWrittenAuthenticatedReg(Point, Dummy)) |
| Regs.push_back(*AutReg); |
| |
| // ... or a safe address can be materialized, ... |
| if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point)) |
| Regs.push_back(*NewAddrReg); |
| |
| // ... or an address can be updated in a safe manner, producing the result |
| // which is as trusted as the input address. |
| if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) { |
| auto [DstReg, SrcReg] = *DstAndSrc; |
| if (Cur.SafeToDerefRegs[SrcReg]) |
| Regs.push_back(DstReg); |
| } |
| |
| // Make sure explicit checker sequence keeps register safe-to-dereference |
| // when the register would be clobbered according to the regular rules: |
| // |
| // ; LR is safe to dereference here |
| // mov x16, x30 ; start of the sequence, LR is s-t-d right before |
| // xpaclri ; clobbers LR, LR is not safe anymore |
| // cmp x30, x16 |
| // b.eq 1f ; end of the sequence: LR is marked as trusted |
| // brk 0xc470 |
| // 1: |
| // ; at this point LR would be marked as trusted, |
| // ; but not safe-to-dereference |
| // |
| // or even just |
| // |
| // ; X1 is safe to dereference here |
| // ldr x0, [x1, #8]! |
| // ; X1 is trusted here, but it was clobbered due to address write-back |
| if (auto CheckedReg = getRegMadeTrustedByChecking(Point, Cur)) |
| Regs.push_back(*CheckedReg); |
| |
| return Regs; |
| } |
| |
| // Returns all registers made trusted by this instruction. |
| SmallVector<MCPhysReg> getRegsMadeTrusted(const MCInst &Point, |
| const SrcState &Cur) const { |
| assert(!AuthTrapsOnFailure && "Use getRegsMadeSafeToDeref instead"); |
| SmallVector<MCPhysReg> Regs; |
| |
| // An authenticated pointer can be checked, ... |
| if (auto CheckedReg = getRegMadeTrustedByChecking(Point, Cur)) |
| Regs.push_back(*CheckedReg); |
| |
| // ... or a pointer can be authenticated by an instruction that always |
| // checks the pointer, ... |
| bool IsChecked = false; |
| std::optional<MCPhysReg> AutReg = |
| BC.MIB->getWrittenAuthenticatedReg(Point, IsChecked); |
| if (AutReg && IsChecked) |
| Regs.push_back(*AutReg); |
| |
| // ... or a safe address can be materialized, ... |
| if (auto NewAddrReg = BC.MIB->getMaterializedAddressRegForPtrAuth(Point)) |
| Regs.push_back(*NewAddrReg); |
| |
| // ... or an address can be updated in a safe manner, producing the result |
| // which is as trusted as the input address. |
| if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Point)) { |
| auto [DstReg, SrcReg] = *DstAndSrc; |
| if (Cur.TrustedRegs[SrcReg]) |
| Regs.push_back(DstReg); |
| } |
| |
| return Regs; |
| } |
| |
| SrcState computeNext(const MCInst &Point, const SrcState &Cur) { |
| if (BC.MIB->isCFI(Point)) |
| return Cur; |
| |
| SrcStatePrinter P(BC); |
| LLVM_DEBUG({ |
| dbgs() << " SrcSafetyAnalysis::ComputeNext("; |
| BC.InstPrinter->printInst(&Point, 0, "", *BC.STI, dbgs()); |
| dbgs() << ", "; |
| P.print(dbgs(), Cur); |
| dbgs() << ")\n"; |
| }); |
| |
| // Skip this instruction until a non-empty state is propagated here. |
| // When performing a dataflow analysis, it is technically possible that |
| // Cur is always empty at a given program point - then just keep it empty. |
| // For details, see DataflowSrcSafetyAnalysis::getStartingStateAtBB() and |
| // FunctionAnalysis::findUnsafeUses(). |
| if (Cur.empty()) { |
| LLVM_DEBUG( |
| { dbgs() << "Skipping computeNext(Point, Cur) as Cur is empty.\n"; }); |
| return SrcState(); |
| } |
| |
| // First, compute various properties of the instruction, taking the state |
| // before its execution into account, if necessary. |
| |
| BitVector Clobbered = getClobberedRegs(Point); |
| SmallVector<MCPhysReg> NewSafeToDerefRegs = |
| getRegsMadeSafeToDeref(Point, Cur); |
| // If authentication instructions trap on failure, safe-to-dereference |
| // registers are always trusted. |
| SmallVector<MCPhysReg> NewTrustedRegs = |
| AuthTrapsOnFailure ? NewSafeToDerefRegs |
| : getRegsMadeTrusted(Point, Cur); |
| |
| // Then, compute the state after this instruction is executed. |
| SrcState Next = Cur; |
| |
| Next.SafeToDerefRegs.reset(Clobbered); |
| Next.TrustedRegs.reset(Clobbered); |
| // Keep track of this instruction if it writes to any of the registers we |
| // need to track that for: |
| for (MCPhysReg Reg : RegsToTrack.getRegisters()) |
| if (Clobbered[Reg]) |
| lastWritingInsts(Next, Reg) = {&Point}; |
| |
| // After accounting for clobbered registers in general, override the state |
| // according to authentication and other *special cases* of clobbering. |
| |
| // The sub-registers are also safe-to-dereference now, but not their |
| // super-registers (as they retain untrusted register units). |
| BitVector NewSafeSubregs(NumRegs); |
| for (MCPhysReg SafeReg : NewSafeToDerefRegs) |
| NewSafeSubregs |= BC.MIB->getAliases(SafeReg, /*OnlySmaller=*/true); |
| for (MCPhysReg Reg : NewSafeSubregs.set_bits()) { |
| Next.SafeToDerefRegs.set(Reg); |
| if (RegsToTrack.isTracked(Reg)) |
| lastWritingInsts(Next, Reg).clear(); |
| } |
| |
| // Process new trusted registers. |
| for (MCPhysReg TrustedReg : NewTrustedRegs) |
| Next.TrustedRegs |= BC.MIB->getAliases(TrustedReg, /*OnlySmaller=*/true); |
| |
| LLVM_DEBUG({ |
| dbgs() << " .. result: ("; |
| P.print(dbgs(), Next); |
| dbgs() << ")\n"; |
| }); |
| |
| // Being trusted is a strictly stronger property than being |
| // safe-to-dereference. |
| assert(Next.TrustedRegs.subsetOf(Next.SafeToDerefRegs) && |
| "SafeToDerefRegs should contain all TrustedRegs"); |
| |
| return Next; |
| } |
| |
| public: |
| std::vector<MCInstReference> |
| getLastClobberingInsts(const MCInst &Inst, BinaryFunction &BF, |
| MCPhysReg ClobberedReg) const { |
| const SrcState &S = getStateBefore(Inst); |
| |
| std::vector<MCInstReference> Result; |
| for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) |
| Result.push_back(MCInstReference::get(*Inst, BF)); |
| return Result; |
| } |
| }; |
| |
| class DataflowSrcSafetyAnalysis |
| : public SrcSafetyAnalysis, |
| public DataflowAnalysis<DataflowSrcSafetyAnalysis, SrcState, |
| /*Backward=*/false, SrcStatePrinter> { |
| using DFParent = DataflowAnalysis<DataflowSrcSafetyAnalysis, SrcState, false, |
| SrcStatePrinter>; |
| friend DFParent; |
| |
| using SrcSafetyAnalysis::BC; |
| using SrcSafetyAnalysis::computeNext; |
| |
| // Pessimistic initial state for basic blocks without any predecessors |
| // (not needed for most functions, thus initialized lazily). |
| SrcState PessimisticState; |
| |
| public: |
| DataflowSrcSafetyAnalysis(BinaryFunction &BF, |
| MCPlusBuilder::AllocatorIdTy AllocId, |
| ArrayRef<MCPhysReg> RegsToTrack) |
| : SrcSafetyAnalysis(BF, RegsToTrack), DFParent(BF, AllocId) {} |
| |
| const SrcState &getStateBefore(const MCInst &Inst) const override { |
| return DFParent::getStateBefore(Inst).get(); |
| } |
| |
| void run() override { |
| for (BinaryBasicBlock &BB : Func) { |
| if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) { |
| MCPhysReg CheckedReg = CheckerInfo->first; |
| MCInst &FirstInst = *CheckerInfo->second; |
| MCInst &LastInst = *BB.getLastNonPseudoInstr(); |
| LLVM_DEBUG({ |
| dbgs() << "Found pointer checking sequence in " << BB.getName() |
| << ":\n"; |
| traceReg(BC, "Checked register", CheckedReg); |
| traceInst(BC, "First instruction", FirstInst); |
| traceInst(BC, "Last instruction", LastInst); |
| }); |
| (void)CheckedReg; |
| (void)FirstInst; |
| assert(llvm::any_of(BB, [&](MCInst &I) { return &I == &FirstInst; }) && |
| "Dataflow analysis expects the checker not to cross BBs"); |
| CheckerSequenceInfo[&LastInst] = *CheckerInfo; |
| } |
| } |
| DFParent::run(); |
| } |
| |
| protected: |
| void preflight() {} |
| |
| SrcState getStartingStateAtBB(const BinaryBasicBlock &BB) { |
| if (BB.isEntryPoint()) |
| return createEntryState(); |
| |
| // If a basic block without any predecessors is found in an optimized code, |
| // this likely means that some CFG edges were not detected. Pessimistically |
| // assume any register that can ever be clobbered in this function to be |
| // unsafe before this basic block. |
| // Warn about this fact in FunctionAnalysis::findUnsafeUses(), as it likely |
| // means imprecise CFG information. |
| if (BB.pred_empty()) { |
| if (PessimisticState.empty()) |
| PessimisticState = computePessimisticState(*BB.getParent()); |
| return PessimisticState; |
| } |
| |
| return SrcState(); |
| } |
| |
| SrcState getStartingStateAtPoint(const MCInst &Point) { return SrcState(); } |
| |
| void doConfluence(SrcState &StateOut, const SrcState &StateIn) { |
| SrcStatePrinter P(BC); |
| LLVM_DEBUG({ |
| dbgs() << " DataflowSrcSafetyAnalysis::Confluence(\n"; |
| dbgs() << " State 1: "; |
| P.print(dbgs(), StateOut); |
| dbgs() << "\n"; |
| dbgs() << " State 2: "; |
| P.print(dbgs(), StateIn); |
| dbgs() << ")\n"; |
| }); |
| |
| StateOut.merge(StateIn); |
| |
| LLVM_DEBUG({ |
| dbgs() << " merged state: "; |
| P.print(dbgs(), StateOut); |
| dbgs() << "\n"; |
| }); |
| } |
| |
| StringRef getAnnotationName() const { return "DataflowSrcSafetyAnalysis"; } |
| }; |
| |
| /// A helper base class for implementing a simplified counterpart of a dataflow |
| /// analysis for functions without CFG information. |
| template <typename StateTy> class CFGUnawareAnalysis { |
| BinaryContext &BC; |
| BinaryFunction &BF; |
| MCPlusBuilder::AllocatorIdTy AllocId; |
| unsigned StateAnnotationIndex; |
| |
| void cleanStateAnnotations() { |
| for (auto &I : BF.instrs()) |
| BC.MIB->removeAnnotation(I.second, StateAnnotationIndex); |
| } |
| |
| protected: |
| CFGUnawareAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId, |
| StringRef AnnotationName) |
| : BC(BF.getBinaryContext()), BF(BF), AllocId(AllocId) { |
| StateAnnotationIndex = BC.MIB->getOrCreateAnnotationIndex(AnnotationName); |
| } |
| |
| void setState(MCInst &Inst, const StateTy &S) { |
| // Check if we need to remove an old annotation (this is the case if |
| // this is the second, detailed run of the analysis). |
| if (BC.MIB->hasAnnotation(Inst, StateAnnotationIndex)) |
| BC.MIB->removeAnnotation(Inst, StateAnnotationIndex); |
| // Attach the state. |
| BC.MIB->addAnnotation(Inst, StateAnnotationIndex, S, AllocId); |
| } |
| |
| const StateTy &getState(const MCInst &Inst) const { |
| return BC.MIB->getAnnotationAs<StateTy>(Inst, StateAnnotationIndex); |
| } |
| |
| virtual ~CFGUnawareAnalysis() { cleanStateAnnotations(); } |
| }; |
| |
| // A simplified implementation of DataflowSrcSafetyAnalysis for functions |
| // lacking CFG information. |
| // |
| // Let assume the instructions can only be executed linearly unless there is |
| // a label to jump to - this should handle both directly jumping to a location |
| // encoded as an immediate operand of a branch instruction, as well as saving a |
| // branch destination somewhere and passing it to an indirect branch instruction |
| // later, provided no arithmetic is performed on the destination address: |
| // |
| // ; good: the destination is directly encoded into the branch instruction |
| // cbz x0, some_label |
| // |
| // ; good: the branch destination is first stored and then used as-is |
| // adr x1, some_label |
| // br x1 |
| // |
| // ; bad: some clever arithmetic is performed manually |
| // adr x1, some_label |
| // add x1, x1, #4 |
| // br x1 |
| // ... |
| // some_label: |
| // ; pessimistically reset the state as we are unsure where we came from |
| // ... |
| // ret |
| // JTI0: |
| // .byte some_label - Ltmp0 ; computing offsets using labels may probably |
| // be detected too, provided enough information |
| // is retained by the assembler and linker |
| // |
| // Then, a function can be split into a number of disjoint contiguous sequences |
| // of instructions without labels in between. These sequences can be processed |
| // the same way basic blocks are processed by dataflow analysis, with the same |
| // pessimistic estimation of the initial state at the start of each sequence |
| // (except the first instruction of the function). |
| class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis, |
| public CFGUnawareAnalysis<SrcState> { |
| using SrcSafetyAnalysis::BC; |
| BinaryFunction &BF; |
| |
| public: |
| CFGUnawareSrcSafetyAnalysis(BinaryFunction &BF, |
| MCPlusBuilder::AllocatorIdTy AllocId, |
| ArrayRef<MCPhysReg> RegsToTrack) |
| : SrcSafetyAnalysis(BF, RegsToTrack), |
| CFGUnawareAnalysis(BF, AllocId, "CFGUnawareSrcSafetyAnalysis"), BF(BF) { |
| } |
| |
| void run() override { |
| const SrcState DefaultState = computePessimisticState(BF); |
| SrcState S = createEntryState(); |
| for (auto &I : BF.instrs()) { |
| MCInst &Inst = I.second; |
| if (BC.MIB->isCFI(Inst)) |
| continue; |
| |
| // If there is a label before this instruction, it is possible that it |
| // can be jumped-to, thus conservatively resetting S. As an exception, |
| // let's ignore any labels at the beginning of the function, as at least |
| // one label is expected there. |
| if (BF.hasLabelAt(I.first) && &Inst != &BF.instrs().begin()->second) { |
| LLVM_DEBUG({ |
| traceInst(BC, "Due to label, resetting the state before", Inst); |
| }); |
| S = DefaultState; |
| } |
| |
| // Attach the state *before* this instruction executes. |
| setState(Inst, S); |
| |
| // Compute the state after this instruction executes. |
| S = computeNext(Inst, S); |
| } |
| } |
| |
| const SrcState &getStateBefore(const MCInst &Inst) const override { |
| return getState(Inst); |
| } |
| }; |
| |
| std::shared_ptr<SrcSafetyAnalysis> |
| SrcSafetyAnalysis::create(BinaryFunction &BF, |
| MCPlusBuilder::AllocatorIdTy AllocId, |
| ArrayRef<MCPhysReg> RegsToTrack) { |
| if (BF.hasCFG()) |
| return std::make_shared<DataflowSrcSafetyAnalysis>(BF, AllocId, |
| RegsToTrack); |
| return std::make_shared<CFGUnawareSrcSafetyAnalysis>(BF, AllocId, |
| RegsToTrack); |
| } |
| |
| /// A state representing which registers are safe to be used as the destination |
| /// operand of an authentication instruction. |
| /// |
| /// Similar to SrcState, it is the responsibility of the analysis to take |
| /// register aliasing into account. |
| /// |
| /// Depending on the implementation (such as whether FEAT_FPAC is implemented |
| /// by an AArch64 CPU or not), it may be possible that an authentication |
| /// instruction returns an invalid pointer on failure instead of terminating |
| /// the program immediately (assuming the program will crash as soon as that |
| /// pointer is dereferenced). Since few bits are usually allocated for the PAC |
| /// field (such as less than 16 bits on a typical AArch64 system), an attacker |
| /// can try every possible signature and guess the correct one if there is a |
| /// gadget that tells whether the particular pointer has a correct signature |
| /// (a so called "authentication oracle"). For that reason, it should be |
| /// impossible for an attacker to test if a pointer is correctly signed - |
| /// either the program should be terminated on authentication failure or |
| /// the result of authentication should not be accessible to an attacker. |
| /// |
| /// Considering the instructions in forward order as they are executed, a |
| /// restricted set of operations can be allowed on any register containing a |
| /// value derived from the result of an authentication instruction until that |
| /// value is checked not to contain the result of a failed authentication. |
| /// In DstSafetyAnalysis, these rules are adapted, so that the safety property |
| /// for a register is computed by iterating the instructions in backward order. |
| /// Then the resulting properties are used at authentication instruction sites |
| /// to check output registers and report the particular instruction if it writes |
| /// to an unsafe register. |
| /// |
| /// Another approach would be to simulate the above rules as-is, iterating over |
| /// the instructions in forward direction. To make it possible to report the |
| /// particular instructions as oracles, this would probably require tracking |
| /// references to these instructions for each register currently containing |
| /// sensitive data. |
| /// |
| /// In DstSafetyAnalysis, the source register Xn of an instruction Inst is safe |
| /// if at least one of the following is true: |
| /// * Inst checks if Xn contains the result of a successful authentication and |
| /// terminates the program on failure. Note that Inst can either naturally |
| /// dereference Xn (load, branch, return, etc. instructions) or be the first |
| /// instruction of an explicit checking sequence. |
| /// * Inst performs safe address arithmetic AND both source and result |
| /// registers, as well as any temporary registers, must be safe after |
| /// execution of Inst (temporaries are not used on AArch64 and thus not |
| /// currently supported/allowed). |
| /// See MCPlusBuilder::analyzeAddressArithmeticsForPtrAuth for the details. |
| /// * Inst fully overwrites Xn with a constant. |
| struct DstState { |
| /// The set of registers whose values cannot be inspected by an attacker in |
| /// a way usable as an authentication oracle. The results of authentication |
| /// instructions should only be written to such registers. |
| BitVector CannotEscapeUnchecked; |
| |
| /// A vector of sets, only used on the second analysis run. |
| /// Each element in this vector represents one of the tracked registers. |
| /// For each such register we track the set of first instructions that leak |
| /// the authenticated pointer before it was checked. This is intended to |
| /// provide clues on which instruction made the particular register unsafe. |
| /// |
| /// Please note that the mapping from MCPhysReg values to indexes in this |
| /// vector is provided by RegsToTrack field of DstSafetyAnalysis. |
| std::vector<SetOfRelatedInsts> FirstInstLeakingReg; |
| |
| /// Constructs an empty state (no registers at all). |
| DstState() {} |
| |
| /// Constructs a new state with all registers marked unsafe. |
| DstState(unsigned NumRegs, unsigned NumRegsToTrack) |
| : CannotEscapeUnchecked(NumRegs), FirstInstLeakingReg(NumRegsToTrack) {} |
| |
| /// Updates *this to account for the state observed in a successor basic |
| /// block (i.e. computes the least safe states among *this and StateIn). |
| DstState &merge(const DstState &StateIn) { |
| if (StateIn.empty()) |
| return *this; |
| if (empty()) |
| return (*this = StateIn); |
| |
| CannotEscapeUnchecked &= StateIn.CannotEscapeUnchecked; |
| for (auto [ThisSet, OtherSet] : |
| llvm::zip_equal(FirstInstLeakingReg, StateIn.FirstInstLeakingReg)) |
| ThisSet.insert_range(OtherSet); |
| return *this; |
| } |
| |
| /// Returns true if this object does not store state of any registers - |
| /// neither safe, nor unsafe ones. |
| bool empty() const { return CannotEscapeUnchecked.empty(); } |
| |
| bool operator==(const DstState &RHS) const { |
| return CannotEscapeUnchecked == RHS.CannotEscapeUnchecked && |
| FirstInstLeakingReg == RHS.FirstInstLeakingReg; |
| } |
| bool operator!=(const DstState &RHS) const { return !((*this) == RHS); } |
| }; |
| |
| static raw_ostream &operator<<(raw_ostream &OS, const DstState &S) { |
| OS << "dst-state<"; |
| if (S.empty()) { |
| OS << "empty"; |
| } else { |
| OS << "CannotEscapeUnchecked: " << S.CannotEscapeUnchecked << ", "; |
| printInstsShort(OS, S.FirstInstLeakingReg); |
| } |
| OS << ">"; |
| return OS; |
| } |
| |
| class DstStatePrinter { |
| public: |
| void print(raw_ostream &OS, const DstState &S) const; |
| explicit DstStatePrinter(const BinaryContext &BC) : BC(BC) {} |
| |
| private: |
| const BinaryContext &BC; |
| }; |
| |
| void DstStatePrinter::print(raw_ostream &OS, const DstState &S) const { |
| RegStatePrinter RegStatePrinter(BC); |
| OS << "dst-state<"; |
| if (S.empty()) { |
| assert(S.CannotEscapeUnchecked.empty()); |
| assert(S.FirstInstLeakingReg.empty()); |
| OS << "empty"; |
| } else { |
| OS << "CannotEscapeUnchecked: "; |
| RegStatePrinter.print(OS, S.CannotEscapeUnchecked); |
| OS << ", "; |
| printInstsShort(OS, S.FirstInstLeakingReg); |
| } |
| OS << ">"; |
| } |
| |
| /// Computes which registers are safe to be written to by auth instructions. |
| /// |
| /// This is the base class for two implementations: a dataflow-based analysis |
| /// which is intended to be used for most functions and a simplified CFG-unaware |
| /// version for functions without reconstructed CFG. |
| class DstSafetyAnalysis { |
| public: |
| DstSafetyAnalysis(BinaryFunction &BF, ArrayRef<MCPhysReg> RegsToTrack) |
| : BC(BF.getBinaryContext()), NumRegs(BC.MRI->getNumRegs()), |
| RegsToTrack(RegsToTrack) {} |
| |
| virtual ~DstSafetyAnalysis() {} |
| |
| static std::shared_ptr<DstSafetyAnalysis> |
| create(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId, |
| ArrayRef<MCPhysReg> RegsToTrack); |
| |
| virtual void run() = 0; |
| virtual const DstState &getStateAfter(const MCInst &Inst) const = 0; |
| |
| protected: |
| BinaryContext &BC; |
| const unsigned NumRegs; |
| |
| /// The set of registers for which the dataflow analysis must compute the set |
| /// of last writing instructions. |
| const TrackedRegisters RegsToTrack; |
| |
| /// Stores information about the detected instruction sequences emitted to |
| /// check an authenticated pointer. Specifically, if such sequence is detected |
| /// in a basic block, it maps the first instruction of that sequence to the |
| /// register being checked. |
| /// |
| /// As the detection of such sequences requires iterating over the adjacent |
| /// instructions, it should be done before calling computeNext(), which |
| /// operates on separate instructions. |
| DenseMap<const MCInst *, MCPhysReg> RegCheckedAt; |
| |
| SetOfRelatedInsts &firstLeakingInsts(DstState &S, MCPhysReg Reg) const { |
| unsigned Index = RegsToTrack.getIndex(Reg); |
| return S.FirstInstLeakingReg[Index]; |
| } |
| const SetOfRelatedInsts &firstLeakingInsts(const DstState &S, |
| MCPhysReg Reg) const { |
| unsigned Index = RegsToTrack.getIndex(Reg); |
| return S.FirstInstLeakingReg[Index]; |
| } |
| |
| /// Creates a state with all registers marked unsafe (not to be confused |
| /// with empty state). |
| DstState createUnsafeState() { |
| return DstState(NumRegs, RegsToTrack.getNumRegisters()); |
| } |
| |
| /// Returns the set of registers that can be leaked by this instruction. |
| /// A register is considered leaked if it has any intersection with any |
| /// register read by Inst. This is similar to how the set of clobbered |
| /// registers is computed, but taking input operands instead of outputs. |
| BitVector getLeakedRegs(const MCInst &Inst) const { |
| BitVector Leaked(NumRegs); |
| |
| // Assume a call can read all registers. |
| if (BC.MIB->isCall(Inst)) { |
| Leaked.set(); |
| return Leaked; |
| } |
| |
| // Compute the set of registers overlapping with any register used by |
| // this instruction. |
| |
| const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode()); |
| |
| for (MCPhysReg Reg : Desc.implicit_uses()) |
| Leaked |= BC.MIB->getAliases(Reg, /*OnlySmaller=*/false); |
| |
| for (const MCOperand &Op : BC.MIB->useOperands(Inst)) { |
| if (Op.isReg()) |
| Leaked |= BC.MIB->getAliases(Op.getReg(), /*OnlySmaller=*/false); |
| } |
| |
| return Leaked; |
| } |
| |
| SmallVector<MCPhysReg> getRegsMadeProtected(const MCInst &Inst, |
| const BitVector &LeakedRegs, |
| const DstState &Cur) const { |
| SmallVector<MCPhysReg> Regs; |
| |
| // A pointer can be checked, ... |
| if (auto CheckedReg = |
| BC.MIB->getAuthCheckedReg(Inst, /*MayOverwrite=*/true)) |
| Regs.push_back(*CheckedReg); |
| if (RegCheckedAt.contains(&Inst)) |
| Regs.push_back(RegCheckedAt.at(&Inst)); |
| |
| // ... or it can be used as a branch target, ... |
| if (BC.MIB->isIndirectBranch(Inst) || BC.MIB->isIndirectCall(Inst)) { |
| bool IsAuthenticated; |
| MCPhysReg BranchDestReg = |
| BC.MIB->getRegUsedAsIndirectBranchDest(Inst, IsAuthenticated); |
| assert(BranchDestReg != BC.MIB->getNoRegister()); |
| if (!IsAuthenticated) |
| Regs.push_back(BranchDestReg); |
| } |
| |
| // ... or it can be used as a return target, ... |
| if (BC.MIB->isReturn(Inst)) { |
| bool IsAuthenticated = false; |
| std::optional<MCPhysReg> RetReg = |
| BC.MIB->getRegUsedAsRetDest(Inst, IsAuthenticated); |
| if (RetReg && !IsAuthenticated) |
| Regs.push_back(*RetReg); |
| } |
| |
| // ... or an address can be updated in a safe manner, ... |
| if (auto DstAndSrc = BC.MIB->analyzeAddressArithmeticsForPtrAuth(Inst)) { |
| auto [DstReg, SrcReg] = *DstAndSrc; |
| // Note that *all* registers containing the derived values must be safe, |
| // both source and destination ones. No temporaries are supported at now. |
| if (Cur.CannotEscapeUnchecked[SrcReg] && |
| Cur.CannotEscapeUnchecked[DstReg]) |
| Regs.push_back(SrcReg); |
| } |
| |
| // ... or the register can be overwritten in whole with a constant: for that |
| // purpose, look for the instructions with no register inputs (neither |
| // explicit nor implicit ones) and no side effects (to rule out reading |
| // not modelled locations). |
| const MCInstrDesc &Desc = BC.MII->get(Inst.getOpcode()); |
| bool HasExplicitSrcRegs = llvm::any_of(BC.MIB->useOperands(Inst), |
| [](auto Op) { return Op.isReg(); }); |
| if (!Desc.hasUnmodeledSideEffects() && !HasExplicitSrcRegs && |
| Desc.implicit_uses().empty()) { |
| for (const MCOperand &Def : BC.MIB->defOperands(Inst)) |
| Regs.push_back(Def.getReg()); |
| } |
| |
| return Regs; |
| } |
| |
| DstState computeNext(const MCInst &Point, const DstState &Cur) { |
| if (BC.MIB->isCFI(Point)) |
| return Cur; |
| |
| DstStatePrinter P(BC); |
| LLVM_DEBUG({ |
| dbgs() << " DstSafetyAnalysis::ComputeNext("; |
| BC.InstPrinter->printInst(&Point, 0, "", *BC.STI, dbgs()); |
| dbgs() << ", "; |
| P.print(dbgs(), Cur); |
| dbgs() << ")\n"; |
| }); |
| |
| // If this instruction terminates the program immediately, no |
| // authentication oracles are possible past this point. |
| if (BC.MIB->isTrap(Point)) { |
| LLVM_DEBUG(traceInst(BC, "Trap instruction found", Point)); |
| DstState Next(NumRegs, RegsToTrack.getNumRegisters()); |
| Next.CannotEscapeUnchecked.set(); |
| return Next; |
| } |
| |
| // If this instruction is reachable by the analysis, a non-empty state will |
| // be propagated to it sooner or later. Until then, skip computeNext(). |
| if (Cur.empty()) { |
| LLVM_DEBUG( |
| { dbgs() << "Skipping computeNext(Point, Cur) as Cur is empty.\n"; }); |
| return DstState(); |
| } |
| |
| // First, compute various properties of the instruction, taking the state |
| // after its execution into account, if necessary. |
| |
| BitVector LeakedRegs = getLeakedRegs(Point); |
| SmallVector<MCPhysReg> NewProtectedRegs = |
| getRegsMadeProtected(Point, LeakedRegs, Cur); |
| |
| // Then, compute the state before this instruction is executed. |
| DstState Next = Cur; |
| |
| Next.CannotEscapeUnchecked.reset(LeakedRegs); |
| for (MCPhysReg Reg : RegsToTrack.getRegisters()) { |
| if (LeakedRegs[Reg]) |
| firstLeakingInsts(Next, Reg) = {&Point}; |
| } |
| |
| BitVector NewProtectedSubregs(NumRegs); |
| for (MCPhysReg Reg : NewProtectedRegs) |
| NewProtectedSubregs |= BC.MIB->getAliases(Reg, /*OnlySmaller=*/true); |
| Next.CannotEscapeUnchecked |= NewProtectedSubregs; |
| for (MCPhysReg Reg : RegsToTrack.getRegisters()) { |
| if (NewProtectedSubregs[Reg]) |
| firstLeakingInsts(Next, Reg).clear(); |
| } |
| |
| LLVM_DEBUG({ |
| dbgs() << " .. result: ("; |
| P.print(dbgs(), Next); |
| dbgs() << ")\n"; |
| }); |
| |
| return Next; |
| } |
| |
| public: |
| std::vector<MCInstReference> getLeakingInsts(const MCInst &Inst, |
| BinaryFunction &BF, |
| MCPhysReg LeakedReg) const { |
| const DstState &S = getStateAfter(Inst); |
| |
| std::vector<MCInstReference> Result; |
| for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) |
| Result.push_back(MCInstReference::get(*Inst, BF)); |
| return Result; |
| } |
| }; |
| |
| class DataflowDstSafetyAnalysis |
| : public DstSafetyAnalysis, |
| public DataflowAnalysis<DataflowDstSafetyAnalysis, DstState, |
| /*Backward=*/true, DstStatePrinter> { |
| using DFParent = DataflowAnalysis<DataflowDstSafetyAnalysis, DstState, true, |
| DstStatePrinter>; |
| friend DFParent; |
| |
| using DstSafetyAnalysis::BC; |
| using DstSafetyAnalysis::computeNext; |
| |
| public: |
| DataflowDstSafetyAnalysis(BinaryFunction &BF, |
| MCPlusBuilder::AllocatorIdTy AllocId, |
| ArrayRef<MCPhysReg> RegsToTrack) |
| : DstSafetyAnalysis(BF, RegsToTrack), DFParent(BF, AllocId) {} |
| |
| const DstState &getStateAfter(const MCInst &Inst) const override { |
| // The dataflow analysis base class iterates backwards over the |
| // instructions, thus "after" vs. "before" difference. |
| return DFParent::getStateBefore(Inst).get(); |
| } |
| |
| void run() override { |
| // As long as DstSafetyAnalysis is only computed to detect authentication |
| // oracles, it is a waste of time to compute it when authentication |
| // instructions are known to always trap on failure. |
| assert(!AuthTrapsOnFailure && |
| "DstSafetyAnalysis is useless with faulting auth"); |
| for (BinaryBasicBlock &BB : Func) { |
| if (auto CheckerInfo = BC.MIB->getAuthCheckedReg(BB)) { |
| LLVM_DEBUG({ |
| dbgs() << "Found pointer checking sequence in " << BB.getName() |
| << ":\n"; |
| traceReg(BC, "Checked register", CheckerInfo->first); |
| traceInst(BC, "First instruction", *CheckerInfo->second); |
| }); |
| RegCheckedAt[CheckerInfo->second] = CheckerInfo->first; |
| } |
| } |
| DFParent::run(); |
| } |
| |
| protected: |
| void preflight() {} |
| |
| DstState getStartingStateAtBB(const BinaryBasicBlock &BB) { |
| // In general, the initial state should be empty, not everything-is-unsafe, |
| // to give a chance for some meaningful state to be propagated to BB from |
| // an indirectly reachable "exit basic block" ending with a return or tail |
| // call instruction. |
| // |
| // A basic block without any successors, on the other hand, can be |
| // pessimistically initialized to everything-is-unsafe: this will naturally |
| // handle return, trap and tail call instructions. At the same time, it is |
| // harmless for internal indirect branch instructions, like computed gotos. |
| if (BB.succ_empty()) |
| return createUnsafeState(); |
| |
| return DstState(); |
| } |
| |
| DstState getStartingStateAtPoint(const MCInst &Point) { return DstState(); } |
| |
| void doConfluence(DstState &StateOut, const DstState &StateIn) { |
| DstStatePrinter P(BC); |
| LLVM_DEBUG({ |
| dbgs() << " DataflowDstSafetyAnalysis::Confluence(\n"; |
| dbgs() << " State 1: "; |
| P.print(dbgs(), StateOut); |
| dbgs() << "\n"; |
| dbgs() << " State 2: "; |
| P.print(dbgs(), StateIn); |
| dbgs() << ")\n"; |
| }); |
| |
| StateOut.merge(StateIn); |
| |
| LLVM_DEBUG({ |
| dbgs() << " merged state: "; |
| P.print(dbgs(), StateOut); |
| dbgs() << "\n"; |
| }); |
| } |
| |
| StringRef getAnnotationName() const { return "DataflowDstSafetyAnalysis"; } |
| }; |
| |
| class CFGUnawareDstSafetyAnalysis : public DstSafetyAnalysis, |
| public CFGUnawareAnalysis<DstState> { |
| using DstSafetyAnalysis::BC; |
| BinaryFunction &BF; |
| |
| public: |
| CFGUnawareDstSafetyAnalysis(BinaryFunction &BF, |
| MCPlusBuilder::AllocatorIdTy AllocId, |
| ArrayRef<MCPhysReg> RegsToTrack) |
| : DstSafetyAnalysis(BF, RegsToTrack), |
| CFGUnawareAnalysis(BF, AllocId, "CFGUnawareDstSafetyAnalysis"), BF(BF) { |
| } |
| |
| void run() override { |
| DstState S = createUnsafeState(); |
| for (auto &I : llvm::reverse(BF.instrs())) { |
| MCInst &Inst = I.second; |
| if (BC.MIB->isCFI(Inst)) |
| continue; |
| |
| // If Inst can change the control flow, we cannot be sure that the next |
| // instruction (to be executed in analyzed program) is the one processed |
| // on the previous iteration, thus pessimistically reset S before |
| // starting to analyze Inst. |
| if (BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst) || |
| BC.MIB->isReturn(Inst)) { |
| LLVM_DEBUG(traceInst(BC, "Control flow instruction", Inst)); |
| S = createUnsafeState(); |
| } |
| |
| // Attach the state *after* this instruction executes. |
| setState(Inst, S); |
| |
| // Compute the state before this instruction executes. |
| S = computeNext(Inst, S); |
| } |
| } |
| |
| const DstState &getStateAfter(const MCInst &Inst) const override { |
| return getState(Inst); |
| } |
| }; |
| |
| std::shared_ptr<DstSafetyAnalysis> |
| DstSafetyAnalysis::create(BinaryFunction &BF, |
| MCPlusBuilder::AllocatorIdTy AllocId, |
| ArrayRef<MCPhysReg> RegsToTrack) { |
| if (BF.hasCFG()) |
| return std::make_shared<DataflowDstSafetyAnalysis>(BF, AllocId, |
| RegsToTrack); |
| return std::make_shared<CFGUnawareDstSafetyAnalysis>(BF, AllocId, |
| RegsToTrack); |
| } |
| |
| // This function could return PartialReport<T>, but currently T is always |
| // MCPhysReg, even though it is an implementation detail. |
| static PartialReport<MCPhysReg> make_generic_report(MCInstReference Location, |
| StringRef Text) { |
| auto Report = std::make_shared<GenericDiagnostic>(Location, Text); |
| return PartialReport<MCPhysReg>(Report, std::nullopt); |
| } |
| |
| template <typename T> |
| static PartialReport<T> make_gadget_report(const GadgetKind &Kind, |
| MCInstReference Location, |
| T RequestedDetails) { |
| auto Report = std::make_shared<GadgetDiagnostic>(Kind, Location); |
| return PartialReport<T>(Report, RequestedDetails); |
| } |
| |
| static std::optional<PartialReport<MCPhysReg>> |
| shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst, |
| const SrcState &S) { |
| static const GadgetKind RetKind("non-protected ret found"); |
| if (!BC.MIB->isReturn(Inst)) |
| return std::nullopt; |
| |
| bool IsAuthenticated = false; |
| std::optional<MCPhysReg> RetReg = |
| BC.MIB->getRegUsedAsRetDest(Inst, IsAuthenticated); |
| if (!RetReg) { |
| return make_generic_report( |
| Inst, "Warning: pac-ret analysis could not analyze this return " |
| "instruction"); |
| } |
| if (IsAuthenticated) |
| return std::nullopt; |
| |
| LLVM_DEBUG({ |
| traceInst(BC, "Found RET inst", Inst); |
| traceReg(BC, "RetReg", *RetReg); |
| traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs); |
| }); |
| |
| if (S.SafeToDerefRegs[*RetReg]) |
| return std::nullopt; |
| |
| return make_gadget_report(RetKind, Inst, *RetReg); |
| } |
| |
| /// While BOLT already marks some of the branch instructions as tail calls, |
| /// this function tries to detect less obvious cases, assuming false positives |
| /// are acceptable as long as there are not too many of them. |
| /// |
| /// It is possible that not all the instructions classified as tail calls by |
| /// this function are safe to be considered as such for the purpose of code |
| /// transformations performed by BOLT. The intention of this function is to |
| /// spot some of actually missed tail calls (and likely a number of unrelated |
| /// indirect branch instructions) as long as this doesn't increase the amount |
| /// of false positive reports unacceptably. |
| static bool shouldAnalyzeTailCallInst(const BinaryContext &BC, |
| const BinaryFunction &BF, |
| const MCInstReference &Inst) { |
| // Some BC.MIB->isXYZ(Inst) methods simply delegate to MCInstrDesc::isXYZ() |
| // (such as isBranch at the time of writing this comment), some don't (such |
| // as isCall). For that reason, call MCInstrDesc's methods explicitly when |
| // it is important. |
| const MCInstrDesc &Desc = BC.MII->get(Inst.getMCInst().getOpcode()); |
| // Tail call should be a branch (but not necessarily an indirect one). |
| if (!Desc.isBranch()) |
| return false; |
| |
| // Always analyze the branches already marked as tail calls by BOLT. |
| if (BC.MIB->isTailCall(Inst)) |
| return true; |
| |
| // Try to also check the branches marked as "UNKNOWN CONTROL FLOW" - the |
| // below is a simplified condition from BinaryContext::printInstruction. |
| bool IsUnknownControlFlow = |
| BC.MIB->isIndirectBranch(Inst) && !BC.MIB->getJumpTable(Inst); |
| |
| if (BF.hasCFG() && IsUnknownControlFlow) |
| return true; |
| |
| return false; |
| } |
| |
| static std::optional<PartialReport<MCPhysReg>> |
| shouldReportUnsafeTailCall(const BinaryContext &BC, const BinaryFunction &BF, |
| const MCInstReference &Inst, const SrcState &S) { |
| static const GadgetKind UntrustedLRKind( |
| "untrusted link register found before tail call"); |
| |
| if (!shouldAnalyzeTailCallInst(BC, BF, Inst)) |
| return std::nullopt; |
| |
| // Not only the set of registers returned by getTrustedLiveInRegs() can be |
| // seen as a reasonable target-independent _approximation_ of "the LR", these |
| // are *exactly* those registers used by SrcSafetyAnalysis to initialize the |
| // set of trusted registers on function entry. |
| // Thus, this function basically checks that the precondition expected to be |
| // imposed by a function call instruction (which is hardcoded into the target- |
| // specific getTrustedLiveInRegs() function) is also respected on tail calls. |
| SmallVector<MCPhysReg> RegsToCheck = BC.MIB->getTrustedLiveInRegs(); |
| LLVM_DEBUG({ |
| traceInst(BC, "Found tail call inst", Inst); |
| traceRegMask(BC, "Trusted regs", S.TrustedRegs); |
| }); |
| |
| // In musl on AArch64, the _start function sets LR to zero and calls the next |
| // stage initialization function at the end, something along these lines: |
| // |
| // _start: |
| // mov x30, #0 |
| // ; ... other initialization ... |
| // b _start_c ; performs "exit" system call at some point |
| // |
| // As this would produce a false positive for every executable linked with |
| // such libc, ignore tail calls performed by ELF entry function. |
| if (BC.StartFunctionAddress && |
| *BC.StartFunctionAddress == Inst.getFunction()->getAddress()) { |
| LLVM_DEBUG(dbgs() << " Skipping tail call in ELF entry function.\n"); |
| return std::nullopt; |
| } |
| |
| // Returns at most one report per instruction - this is probably OK... |
| for (auto Reg : RegsToCheck) |
| if (!S.TrustedRegs[Reg]) |
| return make_gadget_report(UntrustedLRKind, Inst, Reg); |
| |
| return std::nullopt; |
| } |
| |
| static std::optional<PartialReport<MCPhysReg>> |
| shouldReportCallGadget(const BinaryContext &BC, const MCInstReference &Inst, |
| const SrcState &S) { |
| static const GadgetKind CallKind("non-protected call found"); |
| if (!BC.MIB->isIndirectCall(Inst) && !BC.MIB->isIndirectBranch(Inst)) |
| return std::nullopt; |
| |
| bool IsAuthenticated = false; |
| MCPhysReg DestReg = |
| BC.MIB->getRegUsedAsIndirectBranchDest(Inst, IsAuthenticated); |
| if (IsAuthenticated) |
| return std::nullopt; |
| |
| assert(DestReg != BC.MIB->getNoRegister() && "Valid register expected"); |
| LLVM_DEBUG({ |
| traceInst(BC, "Found call inst", Inst); |
| traceReg(BC, "Call destination reg", DestReg); |
| traceRegMask(BC, "SafeToDerefRegs", S.SafeToDerefRegs); |
| }); |
| if (S.SafeToDerefRegs[DestReg]) |
| return std::nullopt; |
| |
| return make_gadget_report(CallKind, Inst, DestReg); |
| } |
| |
| static std::optional<PartialReport<MCPhysReg>> |
| shouldReportSigningOracle(const BinaryContext &BC, const MCInstReference &Inst, |
| const SrcState &S) { |
| static const GadgetKind SigningOracleKind("signing oracle found"); |
| |
| std::optional<MCPhysReg> SignedReg = BC.MIB->getSignedReg(Inst); |
| if (!SignedReg) |
| return std::nullopt; |
| |
| LLVM_DEBUG({ |
| traceInst(BC, "Found sign inst", Inst); |
| traceReg(BC, "Signed reg", *SignedReg); |
| traceRegMask(BC, "TrustedRegs", S.TrustedRegs); |
| }); |
| if (S.TrustedRegs[*SignedReg]) |
| return std::nullopt; |
| |
| return make_gadget_report(SigningOracleKind, Inst, *SignedReg); |
| } |
| |
| static std::optional<PartialReport<MCPhysReg>> |
| shouldReportAuthOracle(const BinaryContext &BC, const MCInstReference &Inst, |
| const DstState &S) { |
| static const GadgetKind AuthOracleKind("authentication oracle found"); |
| |
| bool IsChecked = false; |
| std::optional<MCPhysReg> AuthReg = |
| BC.MIB->getWrittenAuthenticatedReg(Inst, IsChecked); |
| if (!AuthReg || IsChecked) |
| return std::nullopt; |
| |
| LLVM_DEBUG({ |
| traceInst(BC, "Found auth inst", Inst); |
| traceReg(BC, "Authenticated reg", *AuthReg); |
| }); |
| |
| if (S.empty()) { |
| LLVM_DEBUG(dbgs() << " DstState is empty!\n"); |
| return make_generic_report( |
| Inst, "Warning: no state computed for an authentication instruction " |
| "(possibly unreachable)"); |
| } |
| |
| LLVM_DEBUG( |
| { traceRegMask(BC, "safe output registers", S.CannotEscapeUnchecked); }); |
| if (S.CannotEscapeUnchecked[*AuthReg]) |
| return std::nullopt; |
| |
| return make_gadget_report(AuthOracleKind, Inst, *AuthReg); |
| } |
| |
| static SmallVector<MCPhysReg> |
| collectRegsToTrack(ArrayRef<PartialReport<MCPhysReg>> Reports) { |
| SmallSet<MCPhysReg, 4> RegsToTrack; |
| for (auto Report : Reports) |
| if (Report.RequestedDetails) |
| RegsToTrack.insert(*Report.RequestedDetails); |
| |
| return SmallVector<MCPhysReg>(RegsToTrack.begin(), RegsToTrack.end()); |
| } |
| |
| void FunctionAnalysisContext::findUnsafeUses( |
| SmallVector<PartialReport<MCPhysReg>> &Reports) { |
| auto Analysis = SrcSafetyAnalysis::create(BF, AllocatorId, {}); |
| LLVM_DEBUG(dbgs() << "Running src register safety analysis...\n"); |
| Analysis->run(); |
| LLVM_DEBUG({ |
| dbgs() << "After src register safety analysis:\n"; |
| BF.dump(); |
| }); |
| |
| bool UnreachableBBReported = false; |
| if (BF.hasCFG()) { |
| // Warn on basic blocks being unreachable according to BOLT (at most once |
| // per BinaryFunction), as this likely means the CFG reconstructed by BOLT |
| // is imprecise. A basic block can be |
| // * reachable from an entry basic block - a hopefully correct non-empty |
| // state is propagated to that basic block sooner or later. All basic |
| // blocks are expected to belong to this category under normal conditions. |
| // * reachable from a "directly unreachable" BB (a basic block that has no |
| // direct predecessors and this is not because it is an entry BB) - *some* |
| // non-empty state is propagated to this basic block sooner or later, as |
| // the initial state of directly unreachable basic blocks is initialized |
| // to a pessimistic approximation, see computePessimisticState() |
| // - a warning can be printed for the "directly unreachable" basic block |
| // * neither reachable from an entry nor from a "directly unreachable" BB |
| // (such as if this BB is in an isolated loop of basic blocks) - the final |
| // state is computed to be empty for this basic block |
| // - a warning can be printed for this basic block |
| for (BinaryBasicBlock &BB : BF) { |
| MCInst *FirstInst = BB.getFirstNonPseudoInstr(); |
| // Skip empty basic block early for simplicity. |
| if (!FirstInst) |
| continue; |
| |
| bool IsDirectlyUnreachable = BB.pred_empty() && !BB.isEntryPoint(); |
| bool HasNoStateComputed = Analysis->getStateBefore(*FirstInst).empty(); |
| if (!IsDirectlyUnreachable && !HasNoStateComputed) |
| continue; |
| |
| // Arbitrarily attach the report to the first instruction of BB. |
| // This is printed as "[message] in function [name], basic block ..., |
| // at address ..." when the issue is reported to the user. |
| Reports.push_back(make_generic_report( |
| MCInstReference(BB, *FirstInst), |
| "Warning: possibly imprecise CFG, the analysis quality may be " |
| "degraded in this function. According to BOLT, unreachable code is " |
| "found" /* in function [name]... */)); |
| UnreachableBBReported = true; |
| break; // One warning per function. |
| } |
| } |
| // FIXME: Warn the user about imprecise analysis when the function has no CFG |
| // information at all. |
| |
| iterateOverInstrs(BF, [&](MCInstReference Inst) { |
| if (BC.MIB->isCFI(Inst)) |
| return; |
| |
| const SrcState &S = Analysis->getStateBefore(Inst); |
| if (S.empty()) { |
| LLVM_DEBUG(traceInst(BC, "Instruction has no state, skipping", Inst)); |
| assert(UnreachableBBReported && "Should be reported at least once"); |
| (void)UnreachableBBReported; |
| return; |
| } |
| |
| if (auto Report = shouldReportReturnGadget(BC, Inst, S)) |
| Reports.push_back(*Report); |
| |
| if (PacRetGadgetsOnly) |
| return; |
| |
| if (auto Report = shouldReportUnsafeTailCall(BC, BF, Inst, S)) |
| Reports.push_back(*Report); |
| |
| if (auto Report = shouldReportCallGadget(BC, Inst, S)) |
| Reports.push_back(*Report); |
| if (auto Report = shouldReportSigningOracle(BC, Inst, S)) |
| Reports.push_back(*Report); |
| }); |
| } |
| |
| void FunctionAnalysisContext::augmentUnsafeUseReports( |
| ArrayRef<PartialReport<MCPhysReg>> Reports) { |
| SmallVector<MCPhysReg> RegsToTrack = collectRegsToTrack(Reports); |
| // Re-compute the analysis with register tracking. |
| auto Analysis = SrcSafetyAnalysis::create(BF, AllocatorId, RegsToTrack); |
| LLVM_DEBUG(dbgs() << "\nRunning detailed src register safety analysis...\n"); |
| Analysis->run(); |
| LLVM_DEBUG({ |
| dbgs() << "After detailed src register safety analysis:\n"; |
| BF.dump(); |
| }); |
| |
| // Augment gadget reports. |
| for (auto &Report : Reports) { |
| MCInstReference Location = Report.Issue->Location; |
| LLVM_DEBUG(traceInst(BC, "Attaching clobbering info to", Location)); |
| assert(Report.RequestedDetails && |
| "Should be removed by handleSimpleReports"); |
| auto DetailedInfo = |
| std::make_shared<ClobberingInfo>(Analysis->getLastClobberingInsts( |
| Location, BF, *Report.RequestedDetails)); |
| Result.Diagnostics.emplace_back(Report.Issue, DetailedInfo); |
| } |
| } |
| |
| void FunctionAnalysisContext::findUnsafeDefs( |
| SmallVector<PartialReport<MCPhysReg>> &Reports) { |
| if (PacRetGadgetsOnly) |
| return; |
| if (AuthTrapsOnFailure) |
| return; |
| |
| auto Analysis = DstSafetyAnalysis::create(BF, AllocatorId, {}); |
| LLVM_DEBUG(dbgs() << "Running dst register safety analysis...\n"); |
| Analysis->run(); |
| LLVM_DEBUG({ |
| dbgs() << "After dst register safety analysis:\n"; |
| BF.dump(); |
| }); |
| |
| iterateOverInstrs(BF, [&](MCInstReference Inst) { |
| if (BC.MIB->isCFI(Inst)) |
| return; |
| |
| const DstState &S = Analysis->getStateAfter(Inst); |
| |
| if (auto Report = shouldReportAuthOracle(BC, Inst, S)) |
| Reports.push_back(*Report); |
| }); |
| } |
| |
| void FunctionAnalysisContext::augmentUnsafeDefReports( |
| ArrayRef<PartialReport<MCPhysReg>> Reports) { |
| SmallVector<MCPhysReg> RegsToTrack = collectRegsToTrack(Reports); |
| // Re-compute the analysis with register tracking. |
| auto Analysis = DstSafetyAnalysis::create(BF, AllocatorId, RegsToTrack); |
| LLVM_DEBUG(dbgs() << "\nRunning detailed dst register safety analysis...\n"); |
| Analysis->run(); |
| LLVM_DEBUG({ |
| dbgs() << "After detailed dst register safety analysis:\n"; |
| BF.dump(); |
| }); |
| |
| // Augment gadget reports. |
| for (auto &Report : Reports) { |
| MCInstReference Location = Report.Issue->Location; |
| LLVM_DEBUG(traceInst(BC, "Attaching leakage info to", Location)); |
| assert(Report.RequestedDetails && |
| "Should be removed by handleSimpleReports"); |
| auto DetailedInfo = std::make_shared<LeakageInfo>( |
| Analysis->getLeakingInsts(Location, BF, *Report.RequestedDetails)); |
| Result.Diagnostics.emplace_back(Report.Issue, DetailedInfo); |
| } |
| } |
| |
| void FunctionAnalysisContext::handleSimpleReports( |
| SmallVector<PartialReport<MCPhysReg>> &Reports) { |
| // Before re-running the detailed analysis, process the reports which do not |
| // need any additional details to be attached. |
| for (auto &Report : Reports) { |
| if (!Report.RequestedDetails) |
| Result.Diagnostics.emplace_back(Report.Issue, nullptr); |
| } |
| llvm::erase_if(Reports, [](const auto &R) { return !R.RequestedDetails; }); |
| } |
| |
| void FunctionAnalysisContext::run() { |
| LLVM_DEBUG({ |
| dbgs() << "Analyzing function " << BF.getPrintName() |
| << ", AllocatorId = " << AllocatorId << "\n"; |
| BF.dump(); |
| }); |
| |
| SmallVector<PartialReport<MCPhysReg>> UnsafeUses; |
| findUnsafeUses(UnsafeUses); |
| handleSimpleReports(UnsafeUses); |
| if (!UnsafeUses.empty()) |
| augmentUnsafeUseReports(UnsafeUses); |
| |
| SmallVector<PartialReport<MCPhysReg>> UnsafeDefs; |
| findUnsafeDefs(UnsafeDefs); |
| handleSimpleReports(UnsafeDefs); |
| if (!UnsafeDefs.empty()) |
| augmentUnsafeDefReports(UnsafeDefs); |
| } |
| |
| void Analysis::runOnFunction(BinaryFunction &BF, |
| MCPlusBuilder::AllocatorIdTy AllocatorId) { |
| FunctionAnalysisContext FA(BF, AllocatorId, PacRetGadgetsOnly); |
| FA.run(); |
| |
| const FunctionAnalysisResult &FAR = FA.getResult(); |
| if (FAR.Diagnostics.empty()) |
| return; |
| |
| // `runOnFunction` is typically getting called from multiple threads in |
| // parallel. Therefore, use a lock to avoid data races when storing the |
| // result of the analysis in the `AnalysisResults` map. |
| { |
| std::lock_guard<std::mutex> Lock(AnalysisResultsMutex); |
| AnalysisResults[&BF] = FAR; |
| } |
| } |
| |
| static void printBB(const BinaryContext &BC, const BinaryBasicBlock &BB, |
| size_t StartIndex = 0, size_t EndIndex = -1) { |
| if (EndIndex == (size_t)-1) |
| EndIndex = BB.size() - 1; |
| const BinaryFunction *BF = BB.getFunction(); |
| for (unsigned I = StartIndex; I <= EndIndex; ++I) { |
| MCInstReference Inst(BB, I); |
| if (BC.MIB->isCFI(Inst)) |
| continue; |
| BC.printInstruction(outs(), Inst, Inst.computeAddress(), BF); |
| } |
| } |
| |
| static void reportFoundGadgetInSingleBBSingleRelatedInst( |
| raw_ostream &OS, const BinaryContext &BC, const MCInstReference RelatedInst, |
| const MCInstReference Location) { |
| const BinaryBasicBlock *BB = Location.getBasicBlock(); |
| assert(RelatedInst.hasCFG()); |
| assert(Location.hasCFG()); |
| if (BB == RelatedInst.getBasicBlock()) { |
| OS << " This happens in the following basic block:\n"; |
| printBB(BC, *BB); |
| } |
| } |
| |
| void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC, |
| StringRef IssueKind) const { |
| const BinaryBasicBlock *BB = Location.getBasicBlock(); |
| const BinaryFunction *BF = Location.getFunction(); |
| const uint64_t Address = Location.computeAddress(); |
| |
| OS << "\nGS-PAUTH: " << IssueKind; |
| OS << " in function " << BF->getPrintName(); |
| if (BB) |
| OS << ", basic block " << BB->getName(); |
| OS << ", at address " << llvm::format("%x", Address) << "\n"; |
| OS << " The instruction is "; |
| BC.printInstruction(OS, Location, Address, BF); |
| } |
| |
| void GadgetDiagnostic::generateReport(raw_ostream &OS, |
| const BinaryContext &BC) const { |
| printBasicInfo(OS, BC, Kind.getDescription()); |
| } |
| |
| static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location, |
| ArrayRef<MCInstReference> RelatedInstrs) { |
| const BinaryFunction &BF = *Location.getFunction(); |
| const BinaryContext &BC = BF.getBinaryContext(); |
| |
| // Sort by address to ensure output is deterministic. |
| SmallVector<std::pair<uint64_t, MCInstReference>> RI; |
| for (auto &InstRef : RelatedInstrs) |
| RI.push_back(std::make_pair(InstRef.computeAddress(), InstRef)); |
| llvm::sort(RI, [](auto A, auto B) { return A.first < B.first; }); |
| |
| for (unsigned I = 0; I < RI.size(); ++I) { |
| auto [Address, InstRef] = RI[I]; |
| OS << " " << (I + 1) << ". "; |
| BC.printInstruction(OS, InstRef, Address, &BF); |
| }; |
| |
| if (RelatedInstrs.size() == 1) { |
| const MCInstReference RelatedInst = RelatedInstrs[0]; |
| // Printing the details is only implemented when CFG is available, |
| // not to overcomplicate the code, as most functions are expected to |
| // have CFG information. |
| if (RelatedInst.hasCFG()) |
| reportFoundGadgetInSingleBBSingleRelatedInst(OS, BC, RelatedInst, |
| Location); |
| } |
| } |
| |
| void ClobberingInfo::print(raw_ostream &OS, |
| const MCInstReference Location) const { |
| OS << " The " << ClobberingInstrs.size() |
| << " instructions that write to the affected registers after any " |
| "authentication are:\n"; |
| printRelatedInstrs(OS, Location, ClobberingInstrs); |
| } |
| |
| void LeakageInfo::print(raw_ostream &OS, const MCInstReference Location) const { |
| OS << " The " << LeakingInstrs.size() |
| << " instructions that leak the affected registers are:\n"; |
| printRelatedInstrs(OS, Location, LeakingInstrs); |
| } |
| |
| void GenericDiagnostic::generateReport(raw_ostream &OS, |
| const BinaryContext &BC) const { |
| printBasicInfo(OS, BC, Text); |
| } |
| |
| Error Analysis::runOnFunctions(BinaryContext &BC) { |
| ParallelUtilities::WorkFuncWithAllocTy WorkFun = |
| [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocatorId) { |
| runOnFunction(BF, AllocatorId); |
| }; |
| |
| ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { |
| return false; |
| }; |
| |
| ParallelUtilities::runOnEachFunctionWithUniqueAllocId( |
| BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, |
| SkipFunc, "PAuthGadgetScanner"); |
| |
| for (BinaryFunction *BF : BC.getAllBinaryFunctions()) { |
| if (!AnalysisResults.count(BF)) |
| continue; |
| for (const FinalReport &R : AnalysisResults[BF].Diagnostics) { |
| R.Issue->generateReport(outs(), BC); |
| if (R.Details) |
| R.Details->print(outs(), R.Issue->Location); |
| } |
| } |
| return Error::success(); |
| } |
| |
| } // namespace PAuthGadgetScanner |
| } // namespace bolt |
| } // namespace llvm |