[libFuzzer] Add a command-line option for tracing mutation of corpus inputs in the dot graph format.

This patch adds a new command-line option -mutation_graph_file=FILE for
debugging purposes, which traces how corpus inputs evolve during a fuzzing
run. For each new input that is added to the corpus, a new vertex corresponding
to the added input, as well as a new edge that connects its base input to itself
are written to the given file. Each vertex is labeled with the filename of the
input, and each edge is labeled with the mutation sequence that led to the input
w.r.t. its base input.

The format of the mutation graph file is the dot file format. Once prepended and
appended with "graph {" and "}", respectively, the graph becomes a valid dot
file and can be visualized.

Differential Revision: https://reviews.llvm.org/D86560

GitOrigin-RevId: 1bb1eac6b177739429e78703b265e7546792fd64
diff --git a/FuzzerDriver.cpp b/FuzzerDriver.cpp
index caafd1d..57df123 100644
--- a/FuzzerDriver.cpp
+++ b/FuzzerDriver.cpp
@@ -755,6 +755,8 @@
     Options.FeaturesDir = Flags.features_dir;
     ValidateDirectoryExists(Options.FeaturesDir, Flags.create_missing_dirs);
   }
+  if (Flags.mutation_graph_file)
+    Options.MutationGraphFile = Flags.mutation_graph_file;
   if (Flags.collect_data_flow)
     Options.CollectDataFlow = Flags.collect_data_flow;
   if (Flags.stop_file)
diff --git a/FuzzerFlags.def b/FuzzerFlags.def
index fdb8362..c9a787e 100644
--- a/FuzzerFlags.def
+++ b/FuzzerFlags.def
@@ -88,6 +88,11 @@
   "Every time a new input is added to the corpus, a corresponding file in the features_dir"
   " is created containing the unique features of that input."
   " Features are stored in binary format.")
+FUZZER_FLAG_STRING(mutation_graph_file, "Saves a graph (in DOT format) to"
+  " mutation_graph_file. The graph contains a vertex for each input that has"
+  " unique coverage; directed edges are provided between parents and children"
+  " where the child has unique coverage, and are recorded with the type of"
+  " mutation that caused the child.")
 FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters")
 FUZZER_FLAG_INT(use_memmem, 1,
                 "Use hints from intercepting memmem, strstr, etc")
diff --git a/FuzzerIO.cpp b/FuzzerIO.cpp
index c3330c3..54a7219 100644
--- a/FuzzerIO.cpp
+++ b/FuzzerIO.cpp
@@ -77,6 +77,19 @@
   fclose(Out);
 }
 
+void AppendToFile(const std::string &Data, const std::string &Path) {
+  AppendToFile(reinterpret_cast<const uint8_t *>(Data.data()), Data.size(),
+               Path);
+}
+
+void AppendToFile(const uint8_t *Data, size_t Size, const std::string &Path) {
+  FILE *Out = fopen(Path.c_str(), "a");
+  if (!Out)
+    return;
+  fwrite(Data, sizeof(Data[0]), Size, Out);
+  fclose(Out);
+}
+
 void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V,
                             long *Epoch, size_t MaxSize, bool ExitOnError) {
   long E = Epoch ? *Epoch : 0;
diff --git a/FuzzerIO.h b/FuzzerIO.h
index 6e3a0b4..abd2511 100644
--- a/FuzzerIO.h
+++ b/FuzzerIO.h
@@ -29,6 +29,9 @@
 void WriteToFile(const std::string &Data, const std::string &Path);
 void WriteToFile(const Unit &U, const std::string &Path);
 
+void AppendToFile(const uint8_t *Data, size_t Size, const std::string &Path);
+void AppendToFile(const std::string &Data, const std::string &Path);
+
 void ReadDirToVectorOfUnits(const char *Path, Vector<Unit> *V,
                             long *Epoch, size_t MaxSize, bool ExitOnError);
 
diff --git a/FuzzerLoop.cpp b/FuzzerLoop.cpp
index f9986dd..ce8c2fb 100644
--- a/FuzzerLoop.cpp
+++ b/FuzzerLoop.cpp
@@ -463,6 +463,37 @@
              DirPlusFile(FeaturesDir, NewFile));
 }
 
+static void WriteEdgeToMutationGraphFile(const std::string &MutationGraphFile,
+                                         const InputInfo *II,
+                                         const InputInfo *BaseII,
+                                         const std::string &MS) {
+  if (MutationGraphFile.empty())
+    return;
+
+  std::string Sha1 = Sha1ToString(II->Sha1);
+
+  std::string OutputString;
+
+  // Add a new vertex.
+  OutputString.append("\"");
+  OutputString.append(Sha1);
+  OutputString.append("\"\n");
+
+  // Add a new edge if there is base input.
+  if (BaseII) {
+    std::string BaseSha1 = Sha1ToString(BaseII->Sha1);
+    OutputString.append("\"");
+    OutputString.append(BaseSha1);
+    OutputString.append("\" -> \"");
+    OutputString.append(Sha1);
+    OutputString.append("\" [label=\"");
+    OutputString.append(MS);
+    OutputString.append("\"];\n");
+  }
+
+  AppendToFile(OutputString, MutationGraphFile);
+}
+
 bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
                     InputInfo *II, bool ForceAddToCorpus,
                     bool *FoundUniqFeatures) {
@@ -497,6 +528,8 @@
                            TimeOfUnit, UniqFeatureSetTmp, DFT, II);
     WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1),
                           NewII->UniqFeatureSet);
+    WriteEdgeToMutationGraphFile(Options.MutationGraphFile, NewII, II,
+                                 MD.MutationSequence());
     return true;
   }
   if (II && FoundUniqFeaturesOfII &&
diff --git a/FuzzerMutate.cpp b/FuzzerMutate.cpp
index df9ada4..121b450 100644
--- a/FuzzerMutate.cpp
+++ b/FuzzerMutate.cpp
@@ -494,6 +494,15 @@
   }
 }
 
+std::string MutationDispatcher::MutationSequence() {
+  std::string MS;
+  for (auto M : CurrentMutatorSequence) {
+    MS += M.Name;
+    MS += "-";
+  }
+  return MS;
+}
+
 size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) {
   return MutateImpl(Data, Size, MaxSize, Mutators);
 }
diff --git a/FuzzerMutate.h b/FuzzerMutate.h
index 6cbce80..3ce3159 100644
--- a/FuzzerMutate.h
+++ b/FuzzerMutate.h
@@ -26,6 +26,8 @@
   void StartMutationSequence();
   /// Print the current sequence of mutations.
   void PrintMutationSequence();
+  /// Return the current sequence of mutations.
+  std::string MutationSequence();
   /// Indicate that the current sequence of mutations was successful.
   void RecordSuccessfulMutationSequence();
   /// Mutates data by invoking user-provided mutator.
diff --git a/FuzzerOptions.h b/FuzzerOptions.h
index b17a747..706e1c6 100644
--- a/FuzzerOptions.h
+++ b/FuzzerOptions.h
@@ -59,6 +59,7 @@
   std::string DataFlowTrace;
   std::string CollectDataFlow;
   std::string FeaturesDir;
+  std::string MutationGraphFile;
   std::string StopFile;
   bool SaveArtifacts = true;
   bool PrintNEW = true; // Print a status line when new units are found;