[libFuzzer] a bit of refactoring of the fork mode

llvm-svn: 353910
GitOrigin-RevId: d085748484193f3b4f9c73fa339b4390f7092f41
diff --git a/FuzzerFork.cpp b/FuzzerFork.cpp
index bb0ea2a..5126fe7 100644
--- a/FuzzerFork.cpp
+++ b/FuzzerFork.cpp
@@ -5,7 +5,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// Spawn an orchestrate separate fuzzing processes.
+// Spawn and orchestrate separate fuzzing processes.
 //===----------------------------------------------------------------------===//
 
 #include "FuzzerCommand.h"
@@ -17,6 +17,65 @@
 
 namespace fuzzer {
 
+struct FuzzJob {
+  // Inputs.
+  Command Cmd;
+  Vector<std::string> Files;
+  std::string CorpusDir;
+  std::string LogPath;
+  std::string CFPath;
+  int MaxTotalTimeSec;
+
+  // Fuzzing Outputs.
+  int ExitCode;
+};
+
+struct GlobalEnv {
+  const Vector<std::string> *Args;
+  std::string MainCorpusDir;
+  Set<uint32_t> Features;
+  Vector<std::string> Files;
+};
+
+void RunOneFuzzingJob(FuzzJob *Job) {
+  Command &Cmd = Job->Cmd;
+  if (!Job->Files.empty()) {
+    std::string Seeds;
+    for (const auto &File : Job->Files)
+        Seeds += (Seeds.empty() ? "" : ",") + File;
+    Cmd.addFlag("seed_inputs", Seeds);
+  }
+  Cmd.addFlag("max_total_time", std::to_string(Job->MaxTotalTimeSec));
+  Cmd.setOutputFile(Job->LogPath);
+  Cmd.combineOutAndErr();
+  Cmd.addArgument(Job->CorpusDir);
+  RmDirRecursive(Job->CorpusDir);
+  MkDir(Job->CorpusDir);
+  Job->ExitCode = ExecuteCommand(Cmd);
+}
+
+void RunOneMergeJob(GlobalEnv *Env, FuzzJob *Job) {
+  Vector<SizedFile> TempFiles;
+  GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
+
+  Vector<std::string>FilesToAdd;
+  Set<uint32_t> NewFeatures;
+  CrashResistantMerge(*Env->Args, {}, TempFiles, &FilesToAdd, Env->Features,
+                      &NewFeatures, Job->CFPath, false);
+  RemoveFile(Job->CFPath);
+  for (auto &Path : FilesToAdd) {
+    auto U = FileToVector(Path);
+    auto NewPath = DirPlusFile(Env->MainCorpusDir, Hash(U));
+    WriteToFile(U, NewPath);
+    Env->Files.push_back(NewPath);
+  }
+  RmDirRecursive(Job->CorpusDir);
+  Env->Features.insert(NewFeatures.begin(), NewFeatures.end());
+  Printf("INFO: temp_files: %zd files_added: %zd newft: %zd ft: %zd\n",
+         TempFiles.size(), FilesToAdd.size(), NewFeatures.size(),
+         Env->Features.size());
+}
+
 // This is just a skeleton of an experimental -fork=1 feature.
 void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
                   const Vector<std::string> &Args,
@@ -24,99 +83,78 @@
   Printf("INFO: -fork=1: doing fuzzing in a separate process in order to "
          "be more resistant to crashes, timeouts, and OOMs\n");
 
+  GlobalEnv Env;
+  Env.Args = &Args;
+
   Vector<SizedFile> SeedFiles;
   for (auto &Dir : CorpusDirs)
     GetSizedFilesFromDir(Dir, &SeedFiles);
   std::sort(SeedFiles.begin(), SeedFiles.end());
-  auto CFPath = TempPath(".fork");
-  auto LogPath = TempPath(".log");
-  auto TempDir = TempPath(".scratch_dir");
-  std::string MainCorpusDir;
-  if (CorpusDirs.empty())
-    MkDir(MainCorpusDir = TempPath(".corpus_dir"));
-  else
-    MainCorpusDir = CorpusDirs[0];
+  auto TempDir = TempPath(".dir");
+  RmDirRecursive(TempDir);  // just in case there is a leftover from an old run.
   MkDir(TempDir);
 
-  Vector<std::string> Files;
-  Set<uint32_t> Features;
-  if (!SeedFiles.empty()) {
-    CrashResistantMerge(Args, {}, SeedFiles, &Files, {}, &Features, CFPath,
-                        false);
-    RemoveFile(CFPath);
-  }
-  Printf("INFO: -fork=1: %zd seeds, starting to fuzz; scratch: %s\n",
-         Files.size(), TempDir.c_str());
+  auto CFPath = DirPlusFile(TempDir, "merge.txt");
+  auto LogPath = DirPlusFile(TempDir, "sub.log");
 
-  Command BaseCmd(Args);
+  if (CorpusDirs.empty())
+    MkDir(Env.MainCorpusDir = DirPlusFile(TempDir, "C"));
+  else
+    Env.MainCorpusDir = CorpusDirs[0];
+
+  auto TempCorpusDir = DirPlusFile(TempDir, "C0");
+
+  CrashResistantMerge(*Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
+                      CFPath, false);
+  RemoveFile(CFPath);
+  Printf("INFO: -fork=1: %zd seeds, starting to fuzz; scratch: %s\n",
+         Env.Files.size(), TempDir.c_str());
+
+  Command BaseCmd(*Env.Args);
   BaseCmd.removeFlag("fork");
   for (auto &C : CorpusDirs) // Remove all corpora from the args.
     BaseCmd.removeArgument(C);
-  BaseCmd.addArgument(TempDir);
-  BaseCmd.addFlag("len_control", "0");  // len_control is bad for short runs.
   BaseCmd.addFlag("reload", "0");  // working in an isolated dir, no reload.
   int ExitCode = 0;
-  int max_total_time = 1;
-  for (size_t i = 0; i < 1000000; i++) {
+
+
+  for (size_t i = 1; i < 1000000; i++) {
     // TODO: take new files from disk e.g. those generated by another process.
-    Command Cmd(BaseCmd);
-    if (size_t CorpusSubsetSize = std::min(Files.size(), (size_t)10)) {
-      std::string Seeds;
-      for (size_t i = 0; i < CorpusSubsetSize; i++) {
-        if (i) Seeds += ",";
-        Seeds += Files[Rand.SkewTowardsLast(Files.size())];
-      }
-      Cmd.addFlag("seed_inputs", Seeds);
-    }
-    if (Options.MaxTotalTimeSec > max_total_time)
-      max_total_time++;
-    if (!Cmd.hasFlag("max_total_time"))
-      Cmd.addFlag("max_total_time", std::to_string(max_total_time));
-    Cmd.setOutputFile(LogPath);
-    Cmd.combineOutAndErr();
-    RmFilesInDir(TempDir);
-    ExitCode = ExecuteCommand(Cmd);
-    // Printf("done [%d] %s\n", ExitCode, Cmd.toString().c_str());
-    if (ExitCode == Options.InterruptExitCode)
+
+    FuzzJob Job;
+    Job.Cmd = BaseCmd;
+    if (size_t CorpusSubsetSize = std::min(Env.Files.size(), (size_t)100))
+      for (size_t i = 0; i < CorpusSubsetSize; i++)
+        Job.Files.push_back(Env.Files[Rand.SkewTowardsLast(Env.Files.size())]);
+    Job.CorpusDir = TempCorpusDir;
+    Job.LogPath = LogPath;
+    Job.CFPath = CFPath;
+    // Start from very short runs and gradually increase them.
+    Job.MaxTotalTimeSec = std::min(300, (int)i);
+    RunOneFuzzingJob(&Job);
+
+    if (Options.Verbosity >= 2)
+      Printf("done [%d] %s\n", Job.ExitCode, Job.Cmd.toString().c_str());
+    if (Job.ExitCode == Options.InterruptExitCode)
       break;
-    Vector<SizedFile> TempFiles;
-    Vector<std::string>FilesToAdd;
-    Set<uint32_t> NewFeatures;
-    GetSizedFilesFromDir(TempDir, &TempFiles);
-    if (!TempFiles.empty())
-      CrashResistantMerge(Args, {}, TempFiles, &FilesToAdd, Features,
-                          &NewFeatures, CFPath, false);
-    RemoveFile(CFPath);
-    for (auto &Path : FilesToAdd) {
-      auto U = FileToVector(Path);
-      auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
-      WriteToFile(U, NewPath);
-      Files.push_back(NewPath);
-    }
-    Features.insert(NewFeatures.begin(), NewFeatures.end());
-    Printf("INFO: temp_files: %zd files_added: %zd newft: %zd ft: %zd\n",
-           TempFiles.size(), FilesToAdd.size(), NewFeatures.size(),
-           Features.size());
+
+    RunOneMergeJob(&Env, &Job);
+
     // Continue if our crash is one of the ignorred ones.
-    if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
+    if (Options.IgnoreTimeouts && Job.ExitCode == Options.TimeoutExitCode)
       continue;
-    if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
+    if (Options.IgnoreOOMs && Job.ExitCode == Options.OOMExitCode)
       continue;
     // And exit if we don't ignore this crash.
-    if (ExitCode != 0) {
+    if (Job.ExitCode != 0) {
       Printf("INFO: log from the inner process:\n%s",
              FileToString(LogPath).c_str());
+      ExitCode = Job.ExitCode;
       break;
     }
   }
 
-  RmFilesInDir(TempDir);
-  RmDir(TempDir);
-
-  if (CorpusDirs.empty()) {
-    RmFilesInDir(MainCorpusDir);
-    RmDir(MainCorpusDir);
-  }
+  RmDirRecursive(TempDir);
 
   // Use the exit code from the last child process.
   Printf("Fork: exiting: %d\n", ExitCode);
diff --git a/FuzzerIO.cpp b/FuzzerIO.cpp
index 91e1d20..33d6568 100644
--- a/FuzzerIO.cpp
+++ b/FuzzerIO.cpp
@@ -135,11 +135,11 @@
   fflush(OutputFile);
 }
 
-void RmFilesInDir(const std::string &Path) {
-  Vector<std::string> Files;
-  ListFilesInDirRecursive(Path, 0, &Files, /*TopDir*/true);
-  for (auto &F : Files)
-    RemoveFile(F);
+void RmDirRecursive(const std::string &Dir) {
+  IterateDirRecurisve(
+      Dir, [](const std::string &Path) {},
+      [](const std::string &Path) { RmDir(Path); },
+      [](const std::string &Path) { RemoveFile(Path); });
 }
 
 std::string TempPath(const char *Extension) {
diff --git a/FuzzerIO.h b/FuzzerIO.h
index 9d849be..588cf93 100644
--- a/FuzzerIO.h
+++ b/FuzzerIO.h
@@ -60,6 +60,16 @@
 void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
                              Vector<std::string> *V, bool TopDir);
 
+void RmDirRecursive(const std::string &Dir);
+
+// Iterate files and dirs inside Dir, recursively.
+// Call DirPreCallback/DirPostCallback on dirs before/after
+// calling FileCallback on files.
+void IterateDirRecurisve(const std::string &Dir,
+                         void (*DirPreCallback)(const std::string &Dir),
+                         void (*DirPostCallback)(const std::string &Dir),
+                         void (*FileCallback)(const std::string &Dir));
+
 struct SizedFile {
   std::string File;
   size_t Size;
@@ -86,7 +96,6 @@
 
 void MkDir(const std::string &Path);
 void RmDir(const std::string &Path);
-void RmFilesInDir(const std::string &Path);
 
 }  // namespace fuzzer
 
diff --git a/FuzzerIOPosix.cpp b/FuzzerIOPosix.cpp
index 54c511e..93eaad6 100644
--- a/FuzzerIOPosix.cpp
+++ b/FuzzerIOPosix.cpp
@@ -78,6 +78,28 @@
     *Epoch = E;
 }
 
+
+void IterateDirRecurisve(const std::string &Dir,
+                         void (*DirPreCallback)(const std::string &Dir),
+                         void (*DirPostCallback)(const std::string &Dir),
+                         void (*FileCallback)(const std::string &Dir)) {
+  DirPreCallback(Dir);
+  DIR *D = opendir(Dir.c_str());
+  if (!D) return;
+  while (auto E = readdir(D)) {
+    std::string Path = DirPlusFile(Dir, E->d_name);
+    if (E->d_type == DT_REG || E->d_type == DT_LNK ||
+        (E->d_type == DT_UNKNOWN && IsFile(Path)))
+      FileCallback(Path);
+    else if ((E->d_type == DT_DIR ||
+             (E->d_type == DT_UNKNOWN && IsDirectory(Path))) &&
+             *E->d_name != '.')
+      IterateDirRecurisve(Path, DirPreCallback, DirPostCallback, FileCallback);
+  }
+  closedir(D);
+  DirPostCallback(Dir);
+}
+
 char GetSeparator() {
   return '/';
 }
diff --git a/FuzzerIOWindows.cpp b/FuzzerIOWindows.cpp
index bb33343..00256ca 100644
--- a/FuzzerIOWindows.cpp
+++ b/FuzzerIOWindows.cpp
@@ -140,6 +140,15 @@
     *Epoch = E;
 }
 
+
+void IterateDirRecurisve(const std::string &Dir,
+                         void (*DirPreCallback)(const std::string &Dir),
+                         void (*DirPostCallback)(const std::string &Dir),
+                         void (*FileCallback)(const std::string &Dir)) {
+  // Unimplemented.
+  // TODO: implement, and then implement ListFilesInDirRecursive via this one.
+}
+
 char GetSeparator() {
   return '\\';
 }
diff --git a/FuzzerMerge.cpp b/FuzzerMerge.cpp
index 9a86512..556a231 100644
--- a/FuzzerMerge.cpp
+++ b/FuzzerMerge.cpp
@@ -261,6 +261,7 @@
                          const Set<uint32_t> &InitialFeatures,
                          Set<uint32_t> *NewFeatures, const std::string &CFPath,
                          bool V /*Verbose*/) {
+  if (NewCorpus.empty() && OldCorpus.empty()) return;  // Nothing to merge.
   size_t NumAttempts = 0;
   if (FileSize(CFPath)) {
     VPrintf(V, "MERGE-OUTER: non-empty control file provided: '%s'\n",