[libFuzzer] automatically collect the data flow trace (DFT) in the fork mode if -collect_data_flow= is given

llvm-svn: 361448
GitOrigin-RevId: b7cc3d9953312b1f31b673429fcd49864cae118b
diff --git a/FuzzerDataFlowTrace.cpp b/FuzzerDataFlowTrace.cpp
index 975a98a..74bfa8f 100644
--- a/FuzzerDataFlowTrace.cpp
+++ b/FuzzerDataFlowTrace.cpp
@@ -174,6 +174,8 @@
     if (*FocusFunction == L)
       FocusFuncIdx = NumFunctions - 1;
   }
+  if (!NumFunctions)
+    return false;
 
   if (*FocusFunction == "auto") {
     // AUTOFOCUS works like this:
diff --git a/FuzzerDriver.cpp b/FuzzerDriver.cpp
index 95ce8bb..d55caf4 100644
--- a/FuzzerDriver.cpp
+++ b/FuzzerDriver.cpp
@@ -700,6 +700,8 @@
     Options.DataFlowTrace = Flags.data_flow_trace;
   if (Flags.features_dir)
     Options.FeaturesDir = Flags.features_dir;
+  if (Flags.collect_data_flow)
+    Options.CollectDataFlow = Flags.collect_data_flow;
   Options.LazyCounters = Flags.lazy_counters;
 
   unsigned Seed = Flags.seed;
@@ -710,7 +712,7 @@
   if (Flags.verbosity)
     Printf("INFO: Seed: %u\n", Seed);
 
-  if (Flags.collect_data_flow) {
+  if (Flags.collect_data_flow && !Flags.fork && !Flags.merge) {
     if (RunIndividualFiles)
       return CollectDataFlow(Flags.collect_data_flow, Flags.data_flow_trace,
                         ReadCorpora({}, *Inputs));
diff --git a/FuzzerFork.cpp b/FuzzerFork.cpp
index dd16ec1..870a224 100644
--- a/FuzzerFork.cpp
+++ b/FuzzerFork.cpp
@@ -86,6 +86,8 @@
   Vector<std::string> CorpusDirs;
   std::string MainCorpusDir;
   std::string TempDir;
+  std::string DFTDir;
+  std::string DataFlowBinary;
   Set<uint32_t> Features, Cov;
   Vector<std::string> Files;
   Random *Rand;
@@ -109,13 +111,18 @@
     Command Cmd(Args);
     Cmd.removeFlag("fork");
     Cmd.removeFlag("runs");
+    Cmd.removeFlag("collect_data_flow");
     for (auto &C : CorpusDirs) // Remove all corpora from the args.
       Cmd.removeArgument(C);
     Cmd.addFlag("reload", "0");  // working in an isolated dir, no reload.
     Cmd.addFlag("print_final_stats", "1");
     Cmd.addFlag("print_funcs", "0");  // no need to spend time symbolizing.
     Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));
-
+    if (!DataFlowBinary.empty()) {
+      Cmd.addFlag("data_flow_trace", DFTDir);
+      if (!Cmd.hasFlag("focus_function"))
+        Cmd.addFlag("focus_function", "auto");
+    }
     auto Job = new FuzzJob;
     std::string Seeds;
     if (size_t CorpusSubsetSize =
@@ -124,7 +131,8 @@
         Seeds += (Seeds.empty() ? "" : ",") +
                  Files[Rand->SkewTowardsLast(Files.size())];
     if (!Seeds.empty()) {
-      Job->SeedListPath = std::to_string(JobId) + ".seeds";
+      Job->SeedListPath =
+          DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
       WriteToFile(Seeds, Job->SeedListPath);
       Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
     }
@@ -188,6 +196,7 @@
       auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
       WriteToFile(U, NewPath);
       Files.push_back(NewPath);
+      CollectDFT(NewPath);
     }
     Features.insert(NewFeatures.begin(), NewFeatures.end());
     Cov.insert(NewCov.begin(), NewCov.end());
@@ -204,6 +213,23 @@
              Stats.average_exec_per_sec,
              NumOOMs, NumTimeouts, NumCrashes, secondsSinceProcessStartUp());
   }
+
+
+  void CollectDFT(const std::string &InputPath) {
+    if (DataFlowBinary.empty()) return;
+    Command Cmd(Args);
+    Cmd.removeFlag("fork");
+    Cmd.removeFlag("runs");
+    Cmd.addFlag("data_flow_trace", DFTDir);
+    Cmd.addArgument(InputPath);
+    for (auto &C : CorpusDirs) // Remove all corpora from the args.
+      Cmd.removeArgument(C);
+    Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
+    Cmd.combineOutAndErr();
+    // Printf("CollectDFT: %s %s\n", InputPath.c_str(), Cmd.toString().c_str());
+    ExecuteCommand(Cmd);
+  }
+
 };
 
 struct JobQueue {
@@ -248,14 +274,17 @@
   Env.Rand = &Rand;
   Env.Verbosity = Options.Verbosity;
   Env.ProcessStartTime = std::chrono::system_clock::now();
+  Env.DataFlowBinary = Options.CollectDataFlow;
 
   Vector<SizedFile> SeedFiles;
   for (auto &Dir : CorpusDirs)
     GetSizedFilesFromDir(Dir, &SeedFiles);
   std::sort(SeedFiles.begin(), SeedFiles.end());
   Env.TempDir = TempPath(".dir");
+  Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
   RmDirRecursive(Env.TempDir);  // in case there is a leftover from old runs.
   MkDir(Env.TempDir);
+  MkDir(Env.DFTDir);
 
 
   if (CorpusDirs.empty())
@@ -267,6 +296,9 @@
   CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
                       {}, &Env.Cov,
                       CFPath, false);
+  for (auto &F : Env.Files)
+    Env.CollectDFT(F);
+
   RemoveFile(CFPath);
   Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
          Env.Files.size(), Env.TempDir.c_str());
diff --git a/FuzzerMerge.cpp b/FuzzerMerge.cpp
index dace45e..75b2b5d 100644
--- a/FuzzerMerge.cpp
+++ b/FuzzerMerge.cpp
@@ -324,6 +324,7 @@
   Command BaseCmd(Args);
   BaseCmd.removeFlag("merge");
   BaseCmd.removeFlag("fork");
+  BaseCmd.removeFlag("collect_data_flow");
   for (size_t Attempt = 1; Attempt <= NumAttempts; Attempt++) {
     Fuzzer::MaybeExitGracefully();
     VPrintf(V, "MERGE-OUTER: attempt %zd\n", Attempt);
diff --git a/FuzzerOptions.h b/FuzzerOptions.h
index d48439d..687f2ff 100644
--- a/FuzzerOptions.h
+++ b/FuzzerOptions.h
@@ -51,6 +51,7 @@
   std::string ExitOnItem;
   std::string FocusFunction;
   std::string DataFlowTrace;
+  std::string CollectDataFlow;
   std::string FeaturesDir;
   bool SaveArtifacts = true;
   bool PrintNEW = true; // Print a status line when new units are found;