|  | //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | /// | 
|  | /// \file | 
|  | /// This file contains the declaration of the SARIFDocumentWriter class, and | 
|  | /// associated builders such as: | 
|  | /// - \ref SarifArtifact | 
|  | /// - \ref SarifArtifactLocation | 
|  | /// - \ref SarifRule | 
|  | /// - \ref SarifResult | 
|  | //===----------------------------------------------------------------------===// | 
|  | #include "clang/Basic/Sarif.h" | 
|  | #include "clang/Basic/SourceLocation.h" | 
|  | #include "clang/Basic/SourceManager.h" | 
|  | #include "llvm/ADT/ArrayRef.h" | 
|  | #include "llvm/ADT/STLExtras.h" | 
|  | #include "llvm/ADT/StringExtras.h" | 
|  | #include "llvm/ADT/StringRef.h" | 
|  | #include "llvm/Support/ConvertUTF.h" | 
|  | #include "llvm/Support/JSON.h" | 
|  | #include "llvm/Support/Path.h" | 
|  |  | 
|  | #include <optional> | 
|  | #include <string> | 
|  | #include <utility> | 
|  |  | 
|  | using namespace clang; | 
|  | using namespace llvm; | 
|  |  | 
|  | using clang::detail::SarifArtifact; | 
|  | using clang::detail::SarifArtifactLocation; | 
|  |  | 
|  | static StringRef getFileName(FileEntryRef FE) { | 
|  | StringRef Filename = FE.getFileEntry().tryGetRealPathName(); | 
|  | if (Filename.empty()) | 
|  | Filename = FE.getName(); | 
|  | return Filename; | 
|  | } | 
|  | /// \name URI | 
|  | /// @{ | 
|  |  | 
|  | /// \internal | 
|  | /// \brief | 
|  | /// Return the RFC3986 encoding of the input character. | 
|  | /// | 
|  | /// \param C Character to encode to RFC3986. | 
|  | /// | 
|  | /// \return The RFC3986 representation of \c C. | 
|  | static std::string percentEncodeURICharacter(char C) { | 
|  | // RFC 3986 claims alpha, numeric, and this handful of | 
|  | // characters are not reserved for the path component and | 
|  | // should be written out directly. Otherwise, percent | 
|  | // encode the character and write that out instead of the | 
|  | // reserved character. | 
|  | if (llvm::isAlnum(C) || | 
|  | StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C)) | 
|  | return std::string(&C, 1); | 
|  | return "%" + llvm::toHex(StringRef(&C, 1)); | 
|  | } | 
|  |  | 
|  | /// \internal | 
|  | /// \brief Return a URI representing the given file name. | 
|  | /// | 
|  | /// \param Filename The filename to be represented as URI. | 
|  | /// | 
|  | /// \return RFC3986 URI representing the input file name. | 
|  | static std::string fileNameToURI(StringRef Filename) { | 
|  | SmallString<32> Ret = StringRef("file://"); | 
|  |  | 
|  | // Get the root name to see if it has a URI authority. | 
|  | StringRef Root = sys::path::root_name(Filename); | 
|  | if (Root.starts_with("//")) { | 
|  | // There is an authority, so add it to the URI. | 
|  | Ret += Root.drop_front(2).str(); | 
|  | } else if (!Root.empty()) { | 
|  | // There is no authority, so end the component and add the root to the URI. | 
|  | Ret += Twine("/" + Root).str(); | 
|  | } | 
|  |  | 
|  | auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename); | 
|  | assert(Iter != End && "Expected there to be a non-root path component."); | 
|  | // Add the rest of the path components, encoding any reserved characters; | 
|  | // we skip past the first path component, as it was handled it above. | 
|  | for (StringRef Component : llvm::make_range(++Iter, End)) { | 
|  | // For reasons unknown to me, we may get a backslash with Windows native | 
|  | // paths for the initial backslash following the drive component, which | 
|  | // we need to ignore as a URI path part. | 
|  | if (Component == "\\") | 
|  | continue; | 
|  |  | 
|  | // Add the separator between the previous path part and the one being | 
|  | // currently processed. | 
|  | Ret += "/"; | 
|  |  | 
|  | // URI encode the part. | 
|  | for (char C : Component) { | 
|  | Ret += percentEncodeURICharacter(C); | 
|  | } | 
|  | } | 
|  |  | 
|  | return std::string(Ret); | 
|  | } | 
|  | ///  @} | 
|  |  | 
|  | /// \brief Calculate the column position expressed in the number of UTF-8 code | 
|  | /// points from column start to the source location | 
|  | /// | 
|  | /// \param Loc The source location whose column needs to be calculated. | 
|  | /// \param TokenLen Optional hint for when the token is multiple bytes long. | 
|  | /// | 
|  | /// \return The column number as a UTF-8 aware byte offset from column start to | 
|  | /// the effective source location. | 
|  | static unsigned int adjustColumnPos(FullSourceLoc Loc, | 
|  | unsigned int TokenLen = 0) { | 
|  | assert(!Loc.isInvalid() && "invalid Loc when adjusting column position"); | 
|  |  | 
|  | std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc(); | 
|  | std::optional<MemoryBufferRef> Buf = | 
|  | Loc.getManager().getBufferOrNone(LocInfo.first); | 
|  | assert(Buf && "got an invalid buffer for the location's file"); | 
|  | assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && | 
|  | "token extends past end of buffer?"); | 
|  |  | 
|  | // Adjust the offset to be the start of the line, since we'll be counting | 
|  | // Unicode characters from there until our column offset. | 
|  | unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); | 
|  | unsigned int Ret = 1; | 
|  | while (Off < (LocInfo.second + TokenLen)) { | 
|  | Off += getNumBytesForUTF8(Buf->getBuffer()[Off]); | 
|  | Ret++; | 
|  | } | 
|  |  | 
|  | return Ret; | 
|  | } | 
|  |  | 
|  | /// \name SARIF Utilities | 
|  | /// @{ | 
|  |  | 
|  | /// \internal | 
|  | json::Object createMessage(StringRef Text) { | 
|  | return json::Object{{"text", Text.str()}}; | 
|  | } | 
|  |  | 
|  | /// \internal | 
|  | /// \pre CharSourceRange must be a token range | 
|  | static json::Object createTextRegion(const SourceManager &SM, | 
|  | const CharSourceRange &R) { | 
|  | FullSourceLoc BeginCharLoc{R.getBegin(), SM}; | 
|  | FullSourceLoc EndCharLoc{R.getEnd(), SM}; | 
|  | json::Object Region{{"startLine", BeginCharLoc.getExpansionLineNumber()}, | 
|  | {"startColumn", adjustColumnPos(BeginCharLoc)}}; | 
|  |  | 
|  | if (BeginCharLoc == EndCharLoc) { | 
|  | Region["endColumn"] = adjustColumnPos(BeginCharLoc); | 
|  | } else { | 
|  | Region["endLine"] = EndCharLoc.getExpansionLineNumber(); | 
|  | Region["endColumn"] = adjustColumnPos(EndCharLoc); | 
|  | } | 
|  | return Region; | 
|  | } | 
|  |  | 
|  | static json::Object createLocation(json::Object &&PhysicalLocation, | 
|  | StringRef Message = "") { | 
|  | json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; | 
|  | if (!Message.empty()) | 
|  | Ret.insert({"message", createMessage(Message)}); | 
|  | return Ret; | 
|  | } | 
|  |  | 
|  | static StringRef importanceToStr(ThreadFlowImportance I) { | 
|  | switch (I) { | 
|  | case ThreadFlowImportance::Important: | 
|  | return "important"; | 
|  | case ThreadFlowImportance::Essential: | 
|  | return "essential"; | 
|  | case ThreadFlowImportance::Unimportant: | 
|  | return "unimportant"; | 
|  | } | 
|  | llvm_unreachable("Fully covered switch is not so fully covered"); | 
|  | } | 
|  |  | 
|  | static StringRef resultLevelToStr(SarifResultLevel R) { | 
|  | switch (R) { | 
|  | case SarifResultLevel::None: | 
|  | return "none"; | 
|  | case SarifResultLevel::Note: | 
|  | return "note"; | 
|  | case SarifResultLevel::Warning: | 
|  | return "warning"; | 
|  | case SarifResultLevel::Error: | 
|  | return "error"; | 
|  | } | 
|  | llvm_unreachable("Potentially un-handled SarifResultLevel. " | 
|  | "Is the switch not fully covered?"); | 
|  | } | 
|  |  | 
|  | static json::Object | 
|  | createThreadFlowLocation(json::Object &&Location, | 
|  | const ThreadFlowImportance &Importance) { | 
|  | return json::Object{{"location", std::move(Location)}, | 
|  | {"importance", importanceToStr(Importance)}}; | 
|  | } | 
|  | ///  @} | 
|  |  | 
|  | json::Object | 
|  | SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { | 
|  | assert(R.isValid() && | 
|  | "Cannot create a physicalLocation from invalid SourceRange!"); | 
|  | assert(R.isCharRange() && | 
|  | "Cannot create a physicalLocation from a token range!"); | 
|  | FullSourceLoc Start{R.getBegin(), SourceMgr}; | 
|  | OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef(); | 
|  | assert(FE && "Diagnostic does not exist within a valid file!"); | 
|  |  | 
|  | const std::string &FileURI = fileNameToURI(getFileName(*FE)); | 
|  | auto I = CurrentArtifacts.find(FileURI); | 
|  |  | 
|  | if (I == CurrentArtifacts.end()) { | 
|  | uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); | 
|  | const SarifArtifactLocation &Location = | 
|  | SarifArtifactLocation::create(FileURI).setIndex(Idx); | 
|  | const SarifArtifact &Artifact = SarifArtifact::create(Location) | 
|  | .setRoles({"resultFile"}) | 
|  | .setLength(FE->getSize()) | 
|  | .setMimeType("text/plain"); | 
|  | auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact}); | 
|  | // If inserted, ensure the original iterator points to the newly inserted | 
|  | // element, so it can be used downstream. | 
|  | if (StatusIter.second) | 
|  | I = StatusIter.first; | 
|  | } | 
|  | assert(I != CurrentArtifacts.end() && "Failed to insert new artifact"); | 
|  | const SarifArtifactLocation &Location = I->second.Location; | 
|  | json::Object ArtifactLocationObject{{"uri", Location.URI}}; | 
|  | if (Location.Index.has_value()) | 
|  | ArtifactLocationObject["index"] = *Location.Index; | 
|  | return json::Object{{{"artifactLocation", std::move(ArtifactLocationObject)}, | 
|  | {"region", createTextRegion(SourceMgr, R)}}}; | 
|  | } | 
|  |  | 
|  | json::Object &SarifDocumentWriter::getCurrentTool() { | 
|  | assert(!Closed && "SARIF Document is closed. " | 
|  | "Need to call createRun() before using getcurrentTool!"); | 
|  |  | 
|  | // Since Closed = false here, expect there to be at least 1 Run, anything | 
|  | // else is an invalid state. | 
|  | assert(!Runs.empty() && "There are no runs associated with the document!"); | 
|  |  | 
|  | return *Runs.back().getAsObject()->get("tool")->getAsObject(); | 
|  | } | 
|  |  | 
|  | void SarifDocumentWriter::reset() { | 
|  | CurrentRules.clear(); | 
|  | CurrentArtifacts.clear(); | 
|  | } | 
|  |  | 
|  | void SarifDocumentWriter::endRun() { | 
|  | // Exit early if trying to close a closed Document. | 
|  | if (Closed) { | 
|  | reset(); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Since Closed = false here, expect there to be at least 1 Run, anything | 
|  | // else is an invalid state. | 
|  | assert(!Runs.empty() && "There are no runs associated with the document!"); | 
|  |  | 
|  | // Flush all the rules. | 
|  | json::Object &Tool = getCurrentTool(); | 
|  | json::Array Rules; | 
|  | for (const SarifRule &R : CurrentRules) { | 
|  | json::Object Config{ | 
|  | {"enabled", R.DefaultConfiguration.Enabled}, | 
|  | {"level", resultLevelToStr(R.DefaultConfiguration.Level)}, | 
|  | {"rank", R.DefaultConfiguration.Rank}}; | 
|  | json::Object Rule{ | 
|  | {"name", R.Name}, | 
|  | {"id", R.Id}, | 
|  | {"fullDescription", json::Object{{"text", R.Description}}}, | 
|  | {"defaultConfiguration", std::move(Config)}}; | 
|  | if (!R.HelpURI.empty()) | 
|  | Rule["helpUri"] = R.HelpURI; | 
|  | Rules.emplace_back(std::move(Rule)); | 
|  | } | 
|  | json::Object &Driver = *Tool.getObject("driver"); | 
|  | Driver["rules"] = std::move(Rules); | 
|  |  | 
|  | // Flush all the artifacts. | 
|  | json::Object &Run = getCurrentRun(); | 
|  | json::Array *Artifacts = Run.getArray("artifacts"); | 
|  | SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec; | 
|  | for (const auto &[K, V] : CurrentArtifacts) | 
|  | Vec.emplace_back(K, V); | 
|  | llvm::sort(Vec, llvm::less_first()); | 
|  | for (const auto &[_, A] : Vec) { | 
|  | json::Object Loc{{"uri", A.Location.URI}}; | 
|  | if (A.Location.Index.has_value()) { | 
|  | Loc["index"] = static_cast<int64_t>(*A.Location.Index); | 
|  | } | 
|  | json::Object Artifact; | 
|  | Artifact["location"] = std::move(Loc); | 
|  | if (A.Length.has_value()) | 
|  | Artifact["length"] = static_cast<int64_t>(*A.Length); | 
|  | if (!A.Roles.empty()) | 
|  | Artifact["roles"] = json::Array(A.Roles); | 
|  | if (!A.MimeType.empty()) | 
|  | Artifact["mimeType"] = A.MimeType; | 
|  | if (A.Offset.has_value()) | 
|  | Artifact["offset"] = *A.Offset; | 
|  | Artifacts->push_back(json::Value(std::move(Artifact))); | 
|  | } | 
|  |  | 
|  | // Clear, reset temporaries before next run. | 
|  | reset(); | 
|  |  | 
|  | // Mark the document as closed. | 
|  | Closed = true; | 
|  | } | 
|  |  | 
|  | json::Array | 
|  | SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { | 
|  | json::Object Ret{{"locations", json::Array{}}}; | 
|  | json::Array Locs; | 
|  | for (const auto &ThreadFlow : ThreadFlows) { | 
|  | json::Object PLoc = createPhysicalLocation(ThreadFlow.Range); | 
|  | json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message); | 
|  | Locs.emplace_back( | 
|  | createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance)); | 
|  | } | 
|  | Ret["locations"] = std::move(Locs); | 
|  | return json::Array{std::move(Ret)}; | 
|  | } | 
|  |  | 
|  | json::Object | 
|  | SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { | 
|  | return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}}; | 
|  | } | 
|  |  | 
|  | void SarifDocumentWriter::createRun(StringRef ShortToolName, | 
|  | StringRef LongToolName, | 
|  | StringRef ToolVersion) { | 
|  | // Clear resources associated with a previous run. | 
|  | endRun(); | 
|  |  | 
|  | // Signify a new run has begun. | 
|  | Closed = false; | 
|  |  | 
|  | json::Object Tool{ | 
|  | {"driver", | 
|  | json::Object{{"name", ShortToolName}, | 
|  | {"fullName", LongToolName}, | 
|  | {"language", "en-US"}, | 
|  | {"version", ToolVersion}, | 
|  | {"informationUri", | 
|  | "https://clang.llvm.org/docs/UsersManual.html"}}}}; | 
|  | json::Object TheRun{{"tool", std::move(Tool)}, | 
|  | {"results", {}}, | 
|  | {"artifacts", {}}, | 
|  | {"columnKind", "unicodeCodePoints"}}; | 
|  | Runs.emplace_back(std::move(TheRun)); | 
|  | } | 
|  |  | 
|  | json::Object &SarifDocumentWriter::getCurrentRun() { | 
|  | assert(!Closed && | 
|  | "SARIF Document is closed. " | 
|  | "Can only getCurrentRun() if document is opened via createRun(), " | 
|  | "create a run first"); | 
|  |  | 
|  | // Since Closed = false here, expect there to be at least 1 Run, anything | 
|  | // else is an invalid state. | 
|  | assert(!Runs.empty() && "There are no runs associated with the document!"); | 
|  | return *Runs.back().getAsObject(); | 
|  | } | 
|  |  | 
|  | size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { | 
|  | size_t Ret = CurrentRules.size(); | 
|  | CurrentRules.emplace_back(Rule); | 
|  | return Ret; | 
|  | } | 
|  |  | 
|  | void SarifDocumentWriter::appendResult(const SarifResult &Result) { | 
|  | size_t RuleIdx = Result.RuleIdx; | 
|  | assert(RuleIdx < CurrentRules.size() && | 
|  | "Trying to reference a rule that doesn't exist"); | 
|  | const SarifRule &Rule = CurrentRules[RuleIdx]; | 
|  | assert(Rule.DefaultConfiguration.Enabled && | 
|  | "Cannot add a result referencing a disabled Rule"); | 
|  | json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)}, | 
|  | {"ruleIndex", static_cast<int64_t>(RuleIdx)}, | 
|  | {"ruleId", Rule.Id}}; | 
|  | if (!Result.Locations.empty()) { | 
|  | json::Array Locs; | 
|  | for (auto &Range : Result.Locations) { | 
|  | Locs.emplace_back(createLocation(createPhysicalLocation(Range))); | 
|  | } | 
|  | Ret["locations"] = std::move(Locs); | 
|  | } | 
|  | if (!Result.ThreadFlows.empty()) | 
|  | Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)}; | 
|  |  | 
|  | Ret["level"] = resultLevelToStr( | 
|  | Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level)); | 
|  |  | 
|  | json::Object &Run = getCurrentRun(); | 
|  | json::Array *Results = Run.getArray("results"); | 
|  | Results->emplace_back(std::move(Ret)); | 
|  | } | 
|  |  | 
|  | json::Object SarifDocumentWriter::createDocument() { | 
|  | // Flush all temporaries to their destinations if needed. | 
|  | endRun(); | 
|  |  | 
|  | json::Object Doc{ | 
|  | {"$schema", SchemaURI}, | 
|  | {"version", SchemaVersion}, | 
|  | }; | 
|  | if (!Runs.empty()) | 
|  | Doc["runs"] = json::Array(Runs); | 
|  | return Doc; | 
|  | } |