[clangd] Add library to semantically strip flags by name.

Summary:
This is designed for tweaking compile commands by specifying flags to add/remove
in a config file. Something like:
  CompileFlags: { Remove: -fcolor-diagnostics }

Having users tweak raw argv (e.g. with a regex) is going to end in tears: bugs
around clang-cl, xclang, aliases, joined-vs-separate args etc are inevitable.

This isn't in tooling because of the performance choices: build a big table
up-front to make subsequent actions fast. Maybe it should be though.

Reviewers: adamcz, hokein

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D81958
diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp
index 4b69555..4731221 100644
--- a/clang-tools-extra/clangd/CompileCommands.cpp
+++ b/clang-tools-extra/clangd/CompileCommands.cpp
@@ -9,8 +9,12 @@
 #include "CompileCommands.h"
 #include "Config.h"
 #include "support/Logger.h"
+#include "clang/Driver/Options.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Tooling/ArgumentsAdjusters.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -234,5 +238,269 @@
   };
 }
 
+// ArgStripper implementation
+namespace {
+
+// Determine total number of args consumed by this option.
+// Return answers for {Exact, Prefix} match. 0 means not allowed.
+std::pair<unsigned, unsigned> getArgCount(const llvm::opt::Option &Opt) {
+  constexpr static unsigned Rest = 10000; // Should be all the rest!
+  // Reference is llvm::opt::Option::acceptInternal()
+  using llvm::opt::Option;
+  switch (Opt.getKind()) {
+  case Option::FlagClass:
+    return {1, 0};
+  case Option::JoinedClass:
+  case Option::CommaJoinedClass:
+    return {1, 1};
+  case Option::GroupClass:
+  case Option::InputClass:
+  case Option::UnknownClass:
+  case Option::ValuesClass:
+    return {1, 0};
+  case Option::JoinedAndSeparateClass:
+    return {2, 2};
+  case Option::SeparateClass:
+    return {2, 0};
+  case Option::MultiArgClass:
+    return {1 + Opt.getNumArgs(), 0};
+  case Option::JoinedOrSeparateClass:
+    return {2, 1};
+  case Option::RemainingArgsClass:
+    return {Rest, 0};
+  case Option::RemainingArgsJoinedClass:
+    return {Rest, Rest};
+  }
+}
+
+// Flag-parsing mode, which affects which flags are available.
+enum DriverMode : unsigned char {
+  DM_None = 0,
+  DM_GCC = 1, // Default mode e.g. when invoked as 'clang'
+  DM_CL = 2,  // MS CL.exe compatible mode e.g. when invoked as 'clang-cl'
+  DM_CC1 = 4, // When invoked as 'clang -cc1' or after '-Xclang'
+  DM_All = 7
+};
+
+// Examine args list to determine if we're in GCC, CL-compatible, or cc1 mode.
+DriverMode getDriverMode(const std::vector<std::string> &Args) {
+  DriverMode Mode = DM_GCC;
+  llvm::StringRef Argv0 = Args.front();
+  if (Argv0.endswith_lower(".exe"))
+    Argv0 = Argv0.drop_back(strlen(".exe"));
+  if (Argv0.endswith_lower("cl"))
+    Mode = DM_CL;
+  for (const llvm::StringRef Arg : Args) {
+    if (Arg == "--driver-mode=cl") {
+      Mode = DM_CL;
+      break;
+    }
+    if (Arg == "-cc1") {
+      Mode = DM_CC1;
+      break;
+    }
+  }
+  return Mode;
+}
+
+// Returns the set of DriverModes where an option may be used.
+unsigned char getModes(const llvm::opt::Option &Opt) {
+  // Why is this so complicated?!
+  // Reference is clang::driver::Driver::getIncludeExcludeOptionFlagMasks()
+  unsigned char Result = DM_None;
+  if (Opt.hasFlag(driver::options::CC1Option))
+    Result |= DM_CC1;
+  if (!Opt.hasFlag(driver::options::NoDriverOption)) {
+    if (Opt.hasFlag(driver::options::CLOption)) {
+      Result |= DM_CL;
+    } else {
+      Result |= DM_GCC;
+      if (Opt.hasFlag(driver::options::CoreOption)) {
+        Result |= DM_CL;
+      }
+    }
+  }
+  return Result;
+};
+
+} // namespace
+
+llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
+  // All the hard work is done once in a static initializer.
+  // We compute a table containing strings to look for and #args to skip.
+  // e.g. "-x" => {-x 2 args, -x* 1 arg, --language 2 args, --language=* 1 arg}
+  using TableTy =
+      llvm::StringMap<llvm::SmallVector<Rule, 4>, llvm::BumpPtrAllocator>;
+  static TableTy *Table = [] {
+    auto &DriverTable = driver::getDriverOptTable();
+    using DriverID = clang::driver::options::ID;
+
+    // Collect sets of aliases, so we can treat -foo and -foo= as synonyms.
+    // Conceptually a double-linked list: PrevAlias[I] -> I -> NextAlias[I].
+    // If PrevAlias[I] is INVALID, then I is canonical.
+    DriverID PrevAlias[DriverID::LastOption] = {DriverID::OPT_INVALID};
+    DriverID NextAlias[DriverID::LastOption] = {DriverID::OPT_INVALID};
+    auto AddAlias = [&](DriverID Self, DriverID T) {
+      if (NextAlias[T]) {
+        PrevAlias[NextAlias[T]] = Self;
+        NextAlias[Self] = NextAlias[T];
+      }
+      PrevAlias[Self] = T;
+      NextAlias[T] = Self;
+    };
+    // Also grab prefixes for each option, these are not fully exposed.
+    const char *const *Prefixes[DriverID::LastOption] = {nullptr};
+#define PREFIX(NAME, VALUE) static const char *const NAME[] = VALUE;
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELP, METAVAR, VALUES)                                          \
+  if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && ALIASARGS == nullptr)  \
+    AddAlias(DriverID::OPT_##ID, DriverID::OPT_##ALIAS);                       \
+  Prefixes[DriverID::OPT_##ID] = PREFIX;
+#include "clang/Driver/Options.inc"
+#undef OPTION
+#undef PREFIX
+
+    auto Result = std::make_unique<TableTy>();
+    // Iterate over distinct options (represented by the canonical alias).
+    // Every spelling of this option will get the same set of rules.
+    for (unsigned ID = 1 /*Skip INVALID */; ID < DriverID::LastOption; ++ID) {
+      if (PrevAlias[ID] || ID == DriverID::OPT_Xclang)
+        continue; // Not canonical, or specially handled.
+      llvm::SmallVector<Rule, 8> Rules;
+      // Iterate over each alias, to add rules for parsing it.
+      for (unsigned A = ID; A != DriverID::OPT_INVALID; A = NextAlias[A]) {
+        if (Prefixes[A] == nullptr) // option groups.
+          continue;
+        auto Opt = DriverTable.getOption(A);
+        // Exclude - and -foo pseudo-options.
+        if (Opt.getName().empty())
+          continue;
+        auto Modes = getModes(Opt);
+        std::pair<unsigned, unsigned> ArgCount = getArgCount(Opt);
+        // Iterate over each spelling of the alias, e.g. -foo vs --foo.
+        for (auto *Prefix = Prefixes[A]; *Prefix != nullptr; ++Prefix) {
+          llvm::SmallString<64> Buf(*Prefix);
+          Buf.append(Opt.getName());
+          llvm::StringRef Spelling = Result->try_emplace(Buf).first->getKey();
+          Rules.emplace_back();
+          Rule &R = Rules.back();
+          R.Text = Spelling;
+          R.Modes = Modes;
+          R.ExactArgs = ArgCount.first;
+          R.PrefixArgs = ArgCount.second;
+          // Concrete priority is the index into the option table.
+          // Effectively, earlier entries take priority over later ones.
+          assert(ID < std::numeric_limits<decltype(R.Priority)>::max() &&
+                 "Rules::Priority overflowed by options table");
+          R.Priority = ID;
+        }
+      }
+      // Register the set of rules under each possible name.
+      for (const auto &R : Rules)
+        Result->find(R.Text)->second.append(Rules.begin(), Rules.end());
+    }
+#ifndef NDEBUG
+    // Dump the table and various measures of its size.
+    unsigned RuleCount = 0;
+    dlog("ArgStripper Option spelling table");
+    for (const auto &Entry : *Result) {
+      dlog("{0}", Entry.first());
+      RuleCount += Entry.second.size();
+      for (const auto &R : Entry.second)
+        dlog("  {0} #={1} *={2} Mode={3}", R.Text, R.ExactArgs, R.PrefixArgs,
+             int(R.Modes));
+    }
+    dlog("Table spellings={0} rules={1} string-bytes={2}", Result->size(),
+         RuleCount, Result->getAllocator().getBytesAllocated());
+#endif
+    // The static table will never be destroyed.
+    return Result.release();
+  }();
+
+  auto It = Table->find(Arg);
+  return (It == Table->end()) ? llvm::ArrayRef<Rule>() : It->second;
+}
+
+void ArgStripper::strip(llvm::StringRef Arg) {
+  auto OptionRules = rulesFor(Arg);
+  if (OptionRules.empty()) {
+    // Not a recognized flag. Strip it literally.
+    Storage.emplace_back(Arg);
+    Rules.emplace_back();
+    Rules.back().Text = Storage.back();
+    Rules.back().ExactArgs = 1;
+    if (Rules.back().Text.consume_back("*"))
+      Rules.back().PrefixArgs = 1;
+    Rules.back().Modes = DM_All;
+    Rules.back().Priority = -1; // Max unsigned = lowest priority.
+  } else {
+    Rules.append(OptionRules.begin(), OptionRules.end());
+  }
+}
+
+const ArgStripper::Rule *ArgStripper::matchingRule(llvm::StringRef Arg,
+                                                   unsigned Mode,
+                                                   unsigned &ArgCount) const {
+  const ArgStripper::Rule *BestRule = nullptr;
+  for (const Rule &R : Rules) {
+    // Rule can fail to match if...
+    if (!(R.Modes & Mode))
+      continue; // not applicable to current driver mode
+    if (BestRule && BestRule->Priority < R.Priority)
+      continue; // lower-priority than best candidate.
+    if (!Arg.startswith(R.Text))
+      continue; // current arg doesn't match the prefix string
+    bool PrefixMatch = Arg.size() > R.Text.size();
+    // Can rule apply as an exact/prefix match?
+    if (unsigned Count = PrefixMatch ? R.PrefixArgs : R.ExactArgs) {
+      BestRule = &R;
+      ArgCount = Count;
+    }
+    // Continue in case we find a higher-priority rule.
+  }
+  return BestRule;
+}
+
+void ArgStripper::process(std::vector<std::string> &Args) const {
+  if (Args.empty())
+    return;
+
+  // We're parsing the args list in some mode (e.g. gcc-compatible) but may
+  // temporarily switch to another mode with the -Xclang flag.
+  DriverMode MainMode = getDriverMode(Args);
+  DriverMode CurrentMode = MainMode;
+
+  // Read and write heads for in-place deletion.
+  unsigned Read = 0, Write = 0;
+  bool WasXclang = false;
+  while (Read < Args.size()) {
+    unsigned ArgCount = 0;
+    if (const Rule *R = matchingRule(Args[Read], CurrentMode, ArgCount)) {
+      // Delete it and its args.
+      if (WasXclang) {
+        assert(Write > 0);
+        --Write; // Drop previous -Xclang arg
+        CurrentMode = MainMode;
+        WasXclang = false;
+      }
+      // Advance to last arg. An arg may be foo or -Xclang foo.
+      for (unsigned I = 1; Read < Args.size() && I < ArgCount; ++I) {
+        ++Read;
+        if (Read < Args.size() && Args[Read] == "-Xclang")
+          ++Read;
+      }
+    } else {
+      // No match, just copy the arg through.
+      WasXclang = Args[Read] == "-Xclang";
+      CurrentMode = WasXclang ? DM_CC1 : MainMode;
+      if (Write != Read)
+        Args[Write] = std::move(Args[Read]);
+      ++Write;
+    }
+    ++Read;
+  }
+  Args.resize(Write);
+}
+
 } // namespace clangd
 } // namespace clang