[libFuzzer] Initial implementation of weighted mutation leveraging during runtime.

Summary:
Added functions that calculate stats while fuzz targets are running and give
mutations weight based on how much new coverage they provide, and choose better
performing mutations more often.

Patch by Kodé Williams (@kodewilliams).

Reviewers: Dor1s, metzman, morehouse

Reviewed By: Dor1s, morehouse

Subscribers: delcypher, kcc, llvm-commits, #sanitizers

Differential Revision: https://reviews.llvm.org/D49621

llvm-svn: 338776
GitOrigin-RevId: 5a9baa330caf473b9b1fd7c256258411932fba4c
diff --git a/FuzzerDriver.cpp b/FuzzerDriver.cpp
index 783474a..d11f9a6 100644
--- a/FuzzerDriver.cpp
+++ b/FuzzerDriver.cpp
@@ -616,6 +616,7 @@
   Options.PrintNewCovFuncs = Flags.print_funcs;
   Options.PrintFinalStats = Flags.print_final_stats;
   Options.PrintMutationStats = Flags.print_mutation_stats;
+  Options.UseWeightedMutations = Flags.use_weighted_mutations;
   Options.PrintCorpusStats = Flags.print_corpus_stats;
   Options.PrintCoverage = Flags.print_coverage;
   Options.PrintUnstableStats = Flags.print_unstable_stats;
diff --git a/FuzzerFlags.def b/FuzzerFlags.def
index ba04bc2..258427c 100644
--- a/FuzzerFlags.def
+++ b/FuzzerFlags.def
@@ -163,3 +163,5 @@
 FUZZER_DEPRECATED_FLAG(use_clang_coverage)
 FUZZER_FLAG_STRING(data_flow_trace, "Experimental: use the data flow trace")
 FUZZER_FLAG_INT(print_mutation_stats, 0, "Experimental")
+FUZZER_FLAG_INT(use_weighted_mutations, 0, "Experimental: If 1, fuzzing will "
+    "favor mutations that perform better during runtime.")
diff --git a/FuzzerLoop.cpp b/FuzzerLoop.cpp
index 4bc8836..23fcb8a 100644
--- a/FuzzerLoop.cpp
+++ b/FuzzerLoop.cpp
@@ -38,6 +38,7 @@
 
 namespace fuzzer {
 static const size_t kMaxUnitSizeToPrint = 256;
+static const size_t kUpdateMutationWeightRuns = 10000;
 
 thread_local bool Fuzzer::IsMyThread;
 
@@ -554,6 +555,9 @@
 
 void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) {
   TPC.RecordInitialStack();
+  if (Options.UseWeightedMutations &&
+      TotalNumberOfRuns % kUpdateMutationWeightRuns == 0)
+    MD.UpdateDistribution();
   TotalNumberOfRuns++;
   assert(InFuzzingThread());
   if (SMR.IsClient())
diff --git a/FuzzerMutate.cpp b/FuzzerMutate.cpp
index ff076cc..fac3c7a 100644
--- a/FuzzerMutate.cpp
+++ b/FuzzerMutate.cpp
@@ -30,36 +30,41 @@
   DefaultMutators.insert(
       DefaultMutators.begin(),
       {
-          {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes", 0, 0},
-          {&MutationDispatcher::Mutate_InsertByte, "InsertByte", 0, 0},
+          // Initialize useful and total mutation counts as 1 in order to
+          // have mutation stats (i.e. weights) with equal non-zero values.
+          {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes", 1, 1},
+          {&MutationDispatcher::Mutate_InsertByte, "InsertByte", 1, 1},
           {&MutationDispatcher::Mutate_InsertRepeatedBytes,
-           "InsertRepeatedBytes", 0, 0},
-          {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte", 0, 0},
-          {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit", 0, 0},
-          {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes", 0, 0},
-          {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt", 0,
-           0},
-          {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt", 0,
-           0},
-          {&MutationDispatcher::Mutate_CopyPart, "CopyPart", 0, 0},
-          {&MutationDispatcher::Mutate_CrossOver, "CrossOver", 0, 0},
+           "InsertRepeatedBytes", 1, 1},
+          {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte", 1, 1},
+          {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit", 1, 1},
+          {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes", 1, 1},
+          {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt", 1,
+           1},
+          {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt", 1,
+           1},
+          {&MutationDispatcher::Mutate_CopyPart, "CopyPart", 1, 1},
+          {&MutationDispatcher::Mutate_CrossOver, "CrossOver", 1, 1},
           {&MutationDispatcher::Mutate_AddWordFromManualDictionary,
-           "ManualDict", 0, 0},
+           "ManualDict", 1, 1},
           {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary,
-           "PersAutoDict", 0, 0},
+           "PersAutoDict", 1, 1},
       });
   if(Options.UseCmp)
     DefaultMutators.push_back(
-        {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP", 0, 0});
+        {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP", 1, 1});
 
   if (EF->LLVMFuzzerCustomMutator)
-    Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom", 0, 0});
+    Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom", 1, 1});
   else
     Mutators = DefaultMutators;
 
   if (EF->LLVMFuzzerCustomCrossOver)
     Mutators.push_back(
-        {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver", 0, 0});
+        {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver", 1, 1});
+
+  // For weighted mutation selection, init with uniform weights distribution.
+  Stats.resize(Mutators.size());
 }
 
 static char RandCh(Random &Rand) {
@@ -514,8 +519,14 @@
   // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize),
   // in which case they will return 0.
   // Try several times before returning un-mutated data.
+  Mutator *M = nullptr;
   for (int Iter = 0; Iter < 100; Iter++) {
-    auto M = &Mutators[Rand(Mutators.size())];
+    // Even when using weighted mutations, fallback to the default selection in
+    // 20% of cases.
+    if (Options.UseWeightedMutations && Rand(5))
+      M = &Mutators[WeightedIndex()];
+    else
+      M = &Mutators[Rand(Mutators.size())];
     size_t NewSize = (this->*(M->Fn))(Data, Size, MaxSize);
     if (NewSize && NewSize <= MaxSize) {
       if (Options.OnlyASCII)
@@ -568,15 +579,28 @@
 
 void MutationDispatcher::PrintMutationStats() {
   Printf("\nstat::mutation_usefulness:      ");
-  for (size_t i = 0; i < Mutators.size(); i++) {
-    double UsefulPercentage =
-        Mutators[i].TotalCount
-            ? (100.0 * Mutators[i].UsefulCount) / Mutators[i].TotalCount
-            : 0;
-    Printf("%.3f", UsefulPercentage);
-    if (i < Mutators.size() - 1) Printf(",");
+  UpdateMutationStats();
+  for (size_t i = 0; i < Stats.size(); i++) {
+    Printf("%.3f", 100 * Stats[i]);
+    if (i < Stats.size() - 1)
+      Printf(",");
+    else
+      Printf("\n");
   }
-  Printf("\n");
 }
 
+void MutationDispatcher::UpdateMutationStats() {
+  // Calculate usefulness statistic for each mutation
+  for (size_t i = 0; i < Stats.size(); i++)
+    Stats[i] =
+        static_cast<double>(Mutators[i].UsefulCount) / Mutators[i].TotalCount;
+}
+
+void MutationDispatcher::UpdateDistribution() {
+  UpdateMutationStats();
+  Distribution = std::discrete_distribution<size_t>(Stats.begin(), Stats.end());
+}
+
+size_t MutationDispatcher::WeightedIndex() { return Distribution(GetRand()); }
+
 }  // namespace fuzzer
diff --git a/FuzzerMutate.h b/FuzzerMutate.h
index 828ecc1..d89667c 100644
--- a/FuzzerMutate.h
+++ b/FuzzerMutate.h
@@ -93,9 +93,22 @@
 
   Random &GetRand() { return Rand; }
 
+  /// Records tally of mutations resulting in new coverage, for usefulness
+  /// metric.
+  void RecordUsefulMutations();
+
+  /// Outputs usefulness stats on command line if option is enabled.
   void PrintMutationStats();
 
-  void RecordUsefulMutations();
+  /// Recalculates mutation stats based on latest run data.
+  void UpdateMutationStats();
+
+  /// Sets weights based on mutation performance during fuzzer run.
+  void UpdateDistribution();
+
+  /// Returns the index of a mutation based on how useful it has been.
+  /// Favors mutations with higher usefulness ratios but can return any index.
+  size_t WeightedIndex();
 
  private:
   struct Mutator {
@@ -156,6 +169,10 @@
 
   Vector<Mutator> Mutators;
   Vector<Mutator> DefaultMutators;
+
+  // Used to weight mutations based on usefulness.
+  Vector<double> Stats;
+  std::discrete_distribution<size_t> Distribution;
 };
 
 }  // namespace fuzzer
diff --git a/FuzzerOptions.h b/FuzzerOptions.h
index ce39c08..82855ce 100644
--- a/FuzzerOptions.h
+++ b/FuzzerOptions.h
@@ -53,6 +53,7 @@
   int PrintNewCovFuncs = 0;
   bool PrintFinalStats = false;
   bool PrintMutationStats = false;
+  bool UseWeightedMutations = false;
   bool PrintCorpusStats = false;
   bool PrintCoverage = false;
   bool PrintUnstableStats = false;