[libFuzzer] refactor the merging code, NFC

llvm-svn: 353576
GitOrigin-RevId: 114cfafe0585edbdcffbfab2cb32d206f5f02ac7
diff --git a/FuzzerDriver.cpp b/FuzzerDriver.cpp
index a0c9f18..f4e3c39 100644
--- a/FuzzerDriver.cpp
+++ b/FuzzerDriver.cpp
@@ -471,14 +471,21 @@
   return 0;
 }
 
-// This is just a sceleton of an experimental -fork=1 feature.
+// This is just a skeleton of an experimental -fork=1 feature.
 void FuzzWithFork(const FuzzingOptions &Options,
                   const Vector<std::string> &Args,
                   const Vector<std::string> &Corpora) {
   auto CFPath = TempPath(".fork");
   Printf("INFO: -fork=1: doing fuzzing in a separate process in order to "
          "be more resistant to crashes, timeouts, and OOMs\n");
-  auto Files = CrashResistantMerge(Args, Corpora, CFPath);
+
+
+  Vector<SizedFile> Corpus;
+  for (auto &Dir : Corpora)
+    GetSizedFilesFromDir(Dir, &Corpus);
+  std::sort(Corpus.begin(), Corpus.end());
+
+  auto Files = CrashResistantMerge(Args, {}, Corpus, CFPath);
   Printf("INFO: -fork=1: seed corpus analyzed, %zd seeds chosen, starting to "
          "fuzz in separate processes\n", Files.size());
 
@@ -500,6 +507,31 @@
   exit(0);
 }
 
+void Merge(Fuzzer *F, FuzzingOptions &Options, const Vector<std::string> &Args,
+           const Vector<std::string> &Corpora, const char *CFPathOrNull) {
+  if (Corpora.size() < 2) {
+    Printf("INFO: Merge requires two or more corpus dirs\n");
+    exit(0);
+  }
+
+  Vector<SizedFile> OldCorpus, NewCorpus;
+  GetSizedFilesFromDir(Corpora[0], &OldCorpus);
+  for (size_t i = 1; i < Corpora.size(); i++)
+    GetSizedFilesFromDir(Corpora[i], &NewCorpus);
+  std::sort(OldCorpus.begin(), OldCorpus.end());
+  std::sort(NewCorpus.begin(), NewCorpus.end());
+
+  std::string CFPath = CFPathOrNull ? CFPathOrNull : TempPath(".txt");
+  auto Files = CrashResistantMerge(Args, OldCorpus, NewCorpus, CFPath);
+  for (auto &Path : Files)
+    F->WriteToOutputCorpus(FileToVector(Path, Options.MaxLen));
+  // We are done, delete the control file if it was a temporary one.
+  if (!Flags.merge_control_file)
+    RemoveFile(CFPath);
+
+  exit(0);
+}
+
 int AnalyzeDictionary(Fuzzer *F, const Vector<Unit>& Dict,
                       UnitVector& Corpus) {
   Printf("Started dictionary minimization (up to %d tests)\n",
@@ -730,22 +762,8 @@
   if (Flags.fork)
     FuzzWithFork(Options, Args, *Inputs);
 
-  if (Flags.merge) {
-    if (Inputs->size() < 2) {
-      Printf("INFO: Merge requires two or more corpus dirs\n");
-      exit(0);
-    }
-    std::string CFPath =
-        Flags.merge_control_file ? Flags.merge_control_file : TempPath(".txt");
-    auto Files = CrashResistantMerge(Args, *Inputs, CFPath);
-    for (auto &Path : Files)
-      F->WriteToOutputCorpus(FileToVector(Path, Options.MaxLen));
-    // We are done, delete the control file if it was a temporary one.
-    if (!Flags.merge_control_file)
-      RemoveFile(CFPath);
-
-    exit(0);
-  }
+  if (Flags.merge)
+    Merge(F, Options, Args, *Inputs, Flags.merge_control_file);
 
   if (Flags.merge_inner) {
     const size_t kDefaultMaxMergeLen = 1 << 20;
diff --git a/FuzzerMerge.cpp b/FuzzerMerge.cpp
index 0d4971a..5c59026 100644
--- a/FuzzerMerge.cpp
+++ b/FuzzerMerge.cpp
@@ -230,13 +230,15 @@
 }
 
 static void WriteNewControlFile(const std::string &CFPath,
-                                const Vector<SizedFile> &AllFiles,
-                                size_t NumFilesInFirstCorpus) {
+                                const Vector<SizedFile> &OldCorpus,
+                                const Vector<SizedFile> &NewCorpus) {
   RemoveFile(CFPath);
   std::ofstream ControlFile(CFPath);
-  ControlFile << AllFiles.size() << "\n";
-  ControlFile << NumFilesInFirstCorpus << "\n";
-  for (auto &SF: AllFiles)
+  ControlFile << (OldCorpus.size() + NewCorpus.size()) << "\n";
+  ControlFile << OldCorpus.size() << "\n";
+  for (auto &SF: OldCorpus)
+    ControlFile << SF.File << "\n";
+  for (auto &SF: NewCorpus)
     ControlFile << SF.File << "\n";
   if (!ControlFile) {
     Printf("MERGE-OUTER: failed to write to the control file: %s\n",
@@ -245,10 +247,11 @@
   }
 }
 
-// Outer process. Does not call the target code and thus sohuld not fail.
+// Outer process. Does not call the target code and thus should not fail.
 Vector<std::string>
 CrashResistantMerge(const Vector<std::string> &Args,
-                    const Vector<std::string> &Corpora,
+                    const Vector<SizedFile> &OldCorpus,
+                    const Vector<SizedFile> &NewCorpus,
                     const std::string &CFPath) {
   size_t NumAttempts = 0;
   if (FileSize(CFPath)) {
@@ -277,17 +280,10 @@
 
   if (!NumAttempts) {
     // The supplied control file is empty or bad, create a fresh one.
-    Vector<SizedFile> AllFiles;
-    GetSizedFilesFromDir(Corpora[0], &AllFiles);
-    size_t NumFilesInFirstCorpus = AllFiles.size();
-    std::sort(AllFiles.begin(), AllFiles.end());
-    for (size_t i = 1; i < Corpora.size(); i++)
-      GetSizedFilesFromDir(Corpora[i], &AllFiles);
-    std::sort(AllFiles.begin() + NumFilesInFirstCorpus, AllFiles.end());
-    Printf("MERGE-OUTER: %zd files, %zd in the initial corpus\n",
-           AllFiles.size(), NumFilesInFirstCorpus);
-    WriteNewControlFile(CFPath, AllFiles, NumFilesInFirstCorpus);
-    NumAttempts = AllFiles.size();
+    NumAttempts = OldCorpus.size() + NewCorpus.size();
+    Printf("MERGE-OUTER: %zd files, %zd in the initial corpus\n", NumAttempts,
+           OldCorpus.size());
+    WriteNewControlFile(CFPath, OldCorpus, NewCorpus);
   }
 
   // Execute the inner process until it passes.
diff --git a/FuzzerMerge.h b/FuzzerMerge.h
index 273f486..0d35155 100644
--- a/FuzzerMerge.h
+++ b/FuzzerMerge.h
@@ -71,7 +71,8 @@
 
 Vector<std::string>
 CrashResistantMerge(const Vector<std::string> &Args,
-                    const Vector<std::string> &Corpora,
+                    const Vector<SizedFile> &OldCorpus,
+                    const Vector<SizedFile> &NewCorpus,
                     const std::string &CFPath);
 
 }  // namespace fuzzer