[libFuzzer] make the corpus elements aware of their data flow traces

llvm-svn: 334158
GitOrigin-RevId: 67af99235fa24c031a0efcd074b401c66b843f0c
diff --git a/FuzzerCorpus.h b/FuzzerCorpus.h
index b4ba64b..c1603e1 100644
--- a/FuzzerCorpus.h
+++ b/FuzzerCorpus.h
@@ -12,6 +12,7 @@
 #ifndef LLVM_FUZZER_CORPUS
 #define LLVM_FUZZER_CORPUS
 
+#include "FuzzerDataFlowTrace.h"
 #include "FuzzerDefs.h"
 #include "FuzzerIO.h"
 #include "FuzzerRandom.h"
@@ -37,6 +38,7 @@
   bool Reduced = false;
   bool HasFocusFunction = false;
   Vector<uint32_t> UniqFeatureSet;
+  Vector<bool> DataFlowTraceForFocusFunction;
 };
 
 class InputCorpus {
@@ -76,10 +78,17 @@
     });
   }
 
+  size_t NumInputsWithDataFlowTrace() {
+    return std::count_if(Inputs.begin(), Inputs.end(), [](const InputInfo *II) {
+      return !II->DataFlowTraceForFocusFunction.empty();
+    });
+  }
+
   bool empty() const { return Inputs.empty(); }
   const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
   void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
-                   bool HasFocusFunction, const Vector<uint32_t> &FeatureSet) {
+                   bool HasFocusFunction, const Vector<uint32_t> &FeatureSet,
+                   const DataFlowTrace &DFT) {
     assert(!U.empty());
     if (FeatureDebug)
       Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures);
@@ -92,7 +101,11 @@
     II.HasFocusFunction = HasFocusFunction;
     std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end());
     ComputeSHA1(U.data(), U.size(), II.Sha1);
-    Hashes.insert(Sha1ToString(II.Sha1));
+    auto Sha1Str = Sha1ToString(II.Sha1);
+    Hashes.insert(Sha1Str);
+    if (HasFocusFunction)
+      if (auto V = DFT.Get(Sha1Str))
+        II.DataFlowTraceForFocusFunction = *V;
     UpdateCorpusDistribution();
     PrintCorpus();
     // ValidateFeatureSet();
diff --git a/FuzzerDataFlowTrace.cpp b/FuzzerDataFlowTrace.cpp
index 69efd6f..114034c 100644
--- a/FuzzerDataFlowTrace.cpp
+++ b/FuzzerDataFlowTrace.cpp
@@ -73,6 +73,7 @@
             ParseError("the trace should contain only 0 or 1");
           V[I] = Beg[I] == '1';
         }
+        Traces[Name] = V;
         // Print just a few small traces.
         if (NumTracesWithFocusFunction <= 3 && Len <= 16)
           Printf("%s => |%s|\n", Name.c_str(), L.c_str() + SpacePos + 1);
diff --git a/FuzzerDataFlowTrace.h b/FuzzerDataFlowTrace.h
index 2b7b71f..1511430 100644
--- a/FuzzerDataFlowTrace.h
+++ b/FuzzerDataFlowTrace.h
@@ -31,9 +31,25 @@
 
 #include "FuzzerDefs.h"
 
+#include <unordered_map>
+#include <vector>
+#include <string>
+
 namespace fuzzer {
-struct DataFlowTrace {
+class DataFlowTrace {
+ public:
   void Init(const std::string &DirPath, const std::string &FocusFunction);
+  void Clear() { Traces.clear(); }
+  const Vector<bool> *Get(const std::string &InputSha1) const {
+    auto It = Traces.find(InputSha1);
+    if (It != Traces.end())
+      return &It->second;
+    return nullptr;
+  }
+
+ private:
+  // Input's sha1 => DFT for the FocusFunction.
+  std::unordered_map<std::string, Vector<bool> > Traces;
 };
 }  // namespace fuzzer
 
diff --git a/FuzzerLoop.cpp b/FuzzerLoop.cpp
index a195d21..346f90e 100644
--- a/FuzzerLoop.cpp
+++ b/FuzzerLoop.cpp
@@ -469,10 +469,11 @@
     TPC.UpdateObservedPCs();
     Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
                        TPC.ObservedFocusFunction(),
-                       UniqFeatureSetTmp);
+                       UniqFeatureSetTmp, DFT);
     return true;
   }
   if (II && FoundUniqFeaturesOfII &&
+      II->DataFlowTraceForFocusFunction.empty() &&
       FoundUniqFeaturesOfII == II->UniqFeatureSet.size() &&
       II->U.size() > Size) {
     Corpus.Replace(II, {Data, Data + Size});
@@ -739,6 +740,9 @@
   if (!Options.FocusFunction.empty())
     Printf("INFO: %zd/%zd inputs touch the focus function\n",
            Corpus.NumInputsThatTouchFocusFunction(), Corpus.size());
+  if (!Options.DataFlowTrace.empty())
+    Printf("INFO: %zd/%zd inputs have the Data Flow Trace\n",
+           Corpus.NumInputsWithDataFlowTrace(), Corpus.size());
 
   if (Corpus.empty() && Options.MaxNumberOfRuns) {
     Printf("ERROR: no interesting inputs were found. "
@@ -749,6 +753,7 @@
 
 void Fuzzer::Loop(const Vector<std::string> &CorpusDirs) {
   ReadAndExecuteSeedCorpora(CorpusDirs);
+  DFT.Clear();  // No need for DFT any more.
   TPC.SetPrintNewPCs(Options.PrintNewCovPcs);
   TPC.SetPrintNewFuncs(Options.PrintNewCovFuncs);
   system_clock::time_point LastCorpusReload = system_clock::now();
diff --git a/tests/FuzzerUnittest.cpp b/tests/FuzzerUnittest.cpp
index 0b86738..1b3a093 100644
--- a/tests/FuzzerUnittest.cpp
+++ b/tests/FuzzerUnittest.cpp
@@ -582,12 +582,13 @@
 }
 
 TEST(Corpus, Distribution) {
+  DataFlowTrace DFT;
   Random Rand(0);
   std::unique_ptr<InputCorpus> C(new InputCorpus(""));
   size_t N = 10;
   size_t TriesPerUnit = 1<<16;
   for (size_t i = 0; i < N; i++)
-    C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 1, false, false, {});
+    C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 1, false, false, {}, DFT);
 
   Vector<size_t> Hist(N);
   for (size_t i = 0; i < N * TriesPerUnit; i++) {