[libFuzzer] extend the -fork=1 functionality. Still not fully usable, but good enough for the first unit test

llvm-svn: 353775
GitOrigin-RevId: 63f48717b58d76523fdbfdba6fa1e2c10ae9e3da
diff --git a/FuzzerDriver.cpp b/FuzzerDriver.cpp
index c381fb8..232b3a8 100644
--- a/FuzzerDriver.cpp
+++ b/FuzzerDriver.cpp
@@ -472,46 +472,76 @@
 }
 
 // This is just a skeleton of an experimental -fork=1 feature.
-void FuzzWithFork(const FuzzingOptions &Options,
+void FuzzWithFork(Fuzzer *F, const FuzzingOptions &Options,
                   const Vector<std::string> &Args,
                   const Vector<std::string> &Corpora) {
   Printf("INFO: -fork=1: doing fuzzing in a separate process in order to "
          "be more resistant to crashes, timeouts, and OOMs\n");
+  auto Rand = F->GetMD().GetRand();
 
   Vector<SizedFile> Corpus;
   for (auto &Dir : Corpora)
     GetSizedFilesFromDir(Dir, &Corpus);
   std::sort(Corpus.begin(), Corpus.end());
+  auto CFPath = TempPath(".fork");
 
   Vector<std::string> Files;
   Set<uint32_t> Features;
   if (!Corpus.empty()) {
-    auto CFPath = TempPath(".fork");
     CrashResistantMerge(Args, {}, Corpus, &Files, {}, &Features, CFPath);
     RemoveFile(CFPath);
   }
-  Printf("INFO: -fork=1: %zd seeds, starting to fuzz\n", Files.size());
+  auto TempDir = TempPath("Dir");
+  MkDir(TempDir);
+  Printf("INFO: -fork=1: %zd seeds, starting to fuzz; scratch: %s\n",
+         Files.size(), TempDir.c_str());
 
-  Command Cmd(Args);
-  Cmd.removeFlag("fork");
+  Command BaseCmd(Args);
+  BaseCmd.removeFlag("fork");
   for (auto &C : Corpora) // Remove all corpora from the args.
-    Cmd.removeArgument(C);
-  if (Files.size() >= 2)
-    Cmd.addFlag("seed_inputs",
-                Files.back() + "," + Files[Files.size() - 2]);
-  Cmd.addFlag("runs", "1000000");
-  Cmd.addFlag("max_total_time", "30");
-  for (size_t i = 0; i < 1000; i++) {
+    BaseCmd.removeArgument(C);
+  BaseCmd.addFlag("runs", "1000000");
+  BaseCmd.addFlag("max_total_time", "30");
+  BaseCmd.addArgument(TempDir);
+  int ExitCode = 0;
+  for (size_t i = 0; i < 1000000; i++) {
+    // TODO: take new files from disk e.g. those generated by another process.
+    Command Cmd(BaseCmd);
+    if (Files.size() >= 2)
+      Cmd.addFlag("seed_inputs",
+                  Files[Rand.SkewTowardsLast(Files.size())] + "," +
+                      Files[Rand.SkewTowardsLast(Files.size())]);
     Printf("RUN %s\n", Cmd.toString().c_str());
-    int ExitCode = ExecuteCommand(Cmd);
+    RmFilesInDir(TempDir);
+    ExitCode = ExecuteCommand(Cmd);
+    Printf("Exit code: %d\n", ExitCode);
     if (ExitCode == Options.InterruptExitCode)
-      exit(0);
-    if (ExitCode == Options.TimeoutExitCode || ExitCode == Options.OOMExitCode)
-      continue;
+      break;
+    Vector<SizedFile> TempFiles;
+    Vector<std::string>FilesToAdd;
+    Set<uint32_t> NewFeatures;
+    GetSizedFilesFromDir(TempDir, &TempFiles);
+    CrashResistantMerge(Args, {}, TempFiles, &FilesToAdd, Features,
+                        &NewFeatures, CFPath);
+    RemoveFile(CFPath);
+    for (auto &Path : FilesToAdd) {
+      auto NewPath = F->WriteToOutputCorpus(FileToVector(Path, Options.MaxLen));
+      if (!NewPath.empty())
+        Files.push_back(NewPath);
+    }
+    Features.insert(NewFeatures.begin(), NewFeatures.end());
+    Printf("INFO: temp_files: %zd files_added: %zd newft: %zd ft: %zd\n",
+           TempFiles.size(), FilesToAdd.size(), NewFeatures.size(),
+           Features.size());
     if (ExitCode != 0) break;
   }
 
-  exit(0);
+  RmFilesInDir(TempDir);
+  RmDir(TempDir);
+
+  // Use the exit code from the last child process.
+  Printf("Fork: exiting: %d\n", ExitCode);
+  exit(ExitCode);
 }
 
 void Merge(Fuzzer *F, FuzzingOptions &Options, const Vector<std::string> &Args,
@@ -770,7 +800,7 @@
   }
 
   if (Flags.fork)
-    FuzzWithFork(Options, Args, *Inputs);
+    FuzzWithFork(F, Options, Args, *Inputs);
 
   if (Flags.merge)
     Merge(F, Options, Args, *Inputs, Flags.merge_control_file);
diff --git a/FuzzerFlags.def b/FuzzerFlags.def
index 3296551..caf541b 100644
--- a/FuzzerFlags.def
+++ b/FuzzerFlags.def
@@ -36,7 +36,7 @@
     "If one unit runs more than this number of seconds the process will abort.")
 FUZZER_FLAG_INT(error_exitcode, 77, "When libFuzzer itself reports a bug "
   "this exit code will be used.")
-FUZZER_FLAG_INT(timeout_exitcode, 77, "When libFuzzer reports a timeout "
+FUZZER_FLAG_INT(timeout_exitcode, 70, "When libFuzzer reports a timeout "
   "this exit code will be used.")
 FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total "
                                    "time in seconds to run the fuzzer.")
diff --git a/FuzzerIO.cpp b/FuzzerIO.cpp
index 1ff2293..a18fba7 100644
--- a/FuzzerIO.cpp
+++ b/FuzzerIO.cpp
@@ -125,4 +125,11 @@
   fflush(OutputFile);
 }
 
+void RmFilesInDir(const std::string &Path) {
+  Vector<std::string> Files;
+  ListFilesInDirRecursive(Path, 0, &Files, /*TopDir*/true);
+  for (auto &F : Files)
+    RemoveFile(F);
+}
+
 }  // namespace fuzzer
diff --git a/FuzzerIO.h b/FuzzerIO.h
index c3bc608..b31aea9 100644
--- a/FuzzerIO.h
+++ b/FuzzerIO.h
@@ -81,6 +81,10 @@
 
 intptr_t GetHandleFromFd(int fd);
 
+void MkDir(const std::string &Path);
+void RmDir(const std::string &Path);
+void RmFilesInDir(const std::string &Path);
+
 }  // namespace fuzzer
 
 #endif  // LLVM_FUZZER_IO_H
diff --git a/FuzzerIOPosix.cpp b/FuzzerIOPosix.cpp
index 0daed83..54c511e 100644
--- a/FuzzerIOPosix.cpp
+++ b/FuzzerIOPosix.cpp
@@ -136,11 +136,18 @@
   return true;
 }
 
-
 void RawPrint(const char *Str) {
   write(2, Str, strlen(Str));
 }
 
+void MkDir(const std::string &Path) {
+  mkdir(Path.c_str(), 0700);
+}
+
+void RmDir(const std::string &Path) {
+  rmdir(Path.c_str());
+}
+
 }  // namespace fuzzer
 
 #endif // LIBFUZZER_POSIX
diff --git a/FuzzerIOWindows.cpp b/FuzzerIOWindows.cpp
index e45c137..bb33343 100644
--- a/FuzzerIOWindows.cpp
+++ b/FuzzerIOWindows.cpp
@@ -336,6 +336,14 @@
   _write(2, Str, strlen(Str));
 }
 
+void MkDir(const std::string &Path) {
+  Printf("MkDir: unimplemented\n");
+}
+
+void RmDir(const std::string &Path) {
+  Printf("RmDir: unimplemented\n");
+}
+
 }  // namespace fuzzer
 
 #endif // LIBFUZZER_WINDOWS
diff --git a/FuzzerInternal.h b/FuzzerInternal.h
index ca4cdf8..f20dae0 100644
--- a/FuzzerInternal.h
+++ b/FuzzerInternal.h
@@ -87,7 +87,7 @@
 
   void HandleMalloc(size_t Size);
   static void MaybeExitGracefully();
-  void WriteToOutputCorpus(const Unit &U);
+  std::string WriteToOutputCorpus(const Unit &U);
 
 private:
   void AlarmCallback();
diff --git a/FuzzerLoop.cpp b/FuzzerLoop.cpp
index c3be3dd..5cd0cdd 100644
--- a/FuzzerLoop.cpp
+++ b/FuzzerLoop.cpp
@@ -537,15 +537,16 @@
   delete[] DataCopy;
 }
 
-void Fuzzer::WriteToOutputCorpus(const Unit &U) {
+std::string Fuzzer::WriteToOutputCorpus(const Unit &U) {
   if (Options.OnlyASCII)
     assert(IsASCII(U));
   if (Options.OutputCorpus.empty())
-    return;
+    return "";
   std::string Path = DirPlusFile(Options.OutputCorpus, Hash(U));
   WriteToFile(U, Path);
   if (Options.Verbosity >= 2)
     Printf("Written %zd bytes to %s\n", U.size(), Path.c_str());
+  return Path;
 }
 
 void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) {
diff --git a/FuzzerMerge.cpp b/FuzzerMerge.cpp
index 9760ac2..4d00f7e 100644
--- a/FuzzerMerge.cpp
+++ b/FuzzerMerge.cpp
@@ -120,28 +120,28 @@
   return Res;
 }
 
-// Decides which files need to be merged (add thost to NewFiles).
+// Decides which files need to be merged (add those to NewFiles).
 // Returns the number of new features added.
 size_t Merger::Merge(const Set<uint32_t> &InitialFeatures,
-                     Set<uint32_t> *AllFeatures,
+                     Set<uint32_t> *NewFeatures,
                      Vector<std::string> *NewFiles) {
   NewFiles->clear();
   assert(NumFilesInFirstCorpus <= Files.size());
-  *AllFeatures = InitialFeatures;
+  Set<uint32_t> AllFeatures = InitialFeatures;
 
   // What features are in the initial corpus?
   for (size_t i = 0; i < NumFilesInFirstCorpus; i++) {
     auto &Cur = Files[i].Features;
-    AllFeatures->insert(Cur.begin(), Cur.end());
+    AllFeatures.insert(Cur.begin(), Cur.end());
   }
-  size_t InitialNumFeatures = AllFeatures->size();
+  size_t InitialNumFeatures = AllFeatures.size();
 
   // Remove all features that we already know from all other inputs.
   for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) {
     auto &Cur = Files[i].Features;
     Vector<uint32_t> Tmp;
-    std::set_difference(Cur.begin(), Cur.end(), AllFeatures->begin(),
-                        AllFeatures->end(), std::inserter(Tmp, Tmp.begin()));
+    std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(),
+                        AllFeatures.end(), std::inserter(Tmp, Tmp.begin()));
     Cur.swap(Tmp);
   }
 
@@ -161,12 +161,17 @@
     auto &Cur = Files[i].Features;
     // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(),
     //       Files[i].Size, Cur.size());
-    size_t OldSize = AllFeatures->size();
-    AllFeatures->insert(Cur.begin(), Cur.end());
-    if (AllFeatures->size() > OldSize)
+    bool FoundNewFeatures = false;
+    for (auto Fe: Cur) {
+      if (AllFeatures.insert(Fe).second) {
+        FoundNewFeatures = true;
+        NewFeatures->insert(Fe);
+      }
+    }
+    if (FoundNewFeatures)
       NewFiles->push_back(Files[i].Name);
   }
-  return AllFeatures->size() - InitialNumFeatures;
+  return AllFeatures.size() - InitialNumFeatures;
 }
 
 Set<uint32_t> Merger::AllFeatures() const {
diff --git a/FuzzerRandom.h b/FuzzerRandom.h
index f475797..af8e1a4 100644
--- a/FuzzerRandom.h
+++ b/FuzzerRandom.h
@@ -20,6 +20,11 @@
   result_type operator()() { return this->std::mt19937::operator()(); }
   size_t Rand() { return this->operator()(); }
   size_t RandBool() { return Rand() % 2; }
+  size_t SkewTowardsLast(size_t n) {
+    size_t T = this->operator()(n * n);
+    size_t Res = sqrt(T);
+    return Res;
+  }
   size_t operator()(size_t n) { return n ? Rand() % n : 0; }
   intptr_t operator()(intptr_t From, intptr_t To) {
     assert(From < To);
diff --git a/FuzzerUtilLinux.cpp b/FuzzerUtilLinux.cpp
index 640f3d9..d5a15d1 100644
--- a/FuzzerUtilLinux.cpp
+++ b/FuzzerUtilLinux.cpp
@@ -13,12 +13,18 @@
 #include "FuzzerCommand.h"
 
 #include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
 
 namespace fuzzer {
 
 int ExecuteCommand(const Command &Cmd) {
   std::string CmdLine = Cmd.toString();
-  return system(CmdLine.c_str());
+  int exit_code = system(CmdLine.c_str());
+  if (WIFEXITED(exit_code))
+    return WEXITSTATUS(exit_code);
+  return exit_code;
 }
 
 } // namespace fuzzer