[libFuzzer] Add an option to keep initial seed inputs around.

This patch adds an option "keep_seed" to keep all initial seed inputs in the
corpus. Previously, only the initial seed inputs that find new coverage were
added to the corpus, and all the other initial inputs were discarded. We
observed in some circumstances that useful initial seed inputs are discarded as
they find no new coverage, even though they contain useful fragments in them
(e.g., SQLITE3 FuzzBench benchmark). This newly added option provides a way to
keeping seed inputs in the corpus for those circumstances. With this patch, and
with -keep_seed=1, all initial seed inputs are kept in the corpus regardless of
whether they find new coverage or not. Further, these seed inputs are not
replaced with smaller inputs even if -reduce_inputs=1.

Differential Revision: https://reviews.llvm.org/D86577

GitOrigin-RevId: 62673c430de43837b0f177089ed184a0ffcd5678
diff --git a/FuzzerCorpus.h b/FuzzerCorpus.h
index 54d1e09..fd8ff6c 100644
--- a/FuzzerCorpus.h
+++ b/FuzzerCorpus.h
@@ -33,6 +33,7 @@
   // Stats.
   size_t NumExecutedMutations = 0;
   size_t NumSuccessfullMutations = 0;
+  bool NeverReduce = false;
   bool MayDeleteFile = false;
   bool Reduced = false;
   bool HasFocusFunction = false;
@@ -177,7 +178,7 @@
   bool empty() const { return Inputs.empty(); }
   const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
   InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
-                         bool HasFocusFunction,
+                         bool HasFocusFunction, bool NeverReduce,
                          const Vector<uint32_t> &FeatureSet,
                          const DataFlowTrace &DFT, const InputInfo *BaseII) {
     assert(!U.empty());
@@ -187,6 +188,7 @@
     InputInfo &II = *Inputs.back();
     II.U = U;
     II.NumFeatures = NumFeatures;
+    II.NeverReduce = NeverReduce;
     II.MayDeleteFile = MayDeleteFile;
     II.UniqFeatureSet = FeatureSet;
     II.HasFocusFunction = HasFocusFunction;
diff --git a/FuzzerDriver.cpp b/FuzzerDriver.cpp
index 2615014..a13fb03 100644
--- a/FuzzerDriver.cpp
+++ b/FuzzerDriver.cpp
@@ -671,6 +671,7 @@
   Options.Verbosity = Flags.verbosity;
   Options.MaxLen = Flags.max_len;
   Options.LenControl = Flags.len_control;
+  Options.KeepSeed = Flags.keep_seed;
   Options.UnitTimeoutSec = Flags.timeout;
   Options.ErrorExitCode = Flags.error_exitcode;
   Options.TimeoutExitCode = Flags.timeout_exitcode;
diff --git a/FuzzerFlags.def b/FuzzerFlags.def
index 8114791..1dc805d 100644
--- a/FuzzerFlags.def
+++ b/FuzzerFlags.def
@@ -23,6 +23,10 @@
 FUZZER_FLAG_STRING(seed_inputs, "A comma-separated list of input files "
   "to use as an additional seed corpus. Alternatively, an \"@\" followed by "
   "the name of a file containing the comma-separated list.")
+FUZZER_FLAG_INT(keep_seed, 0, "If 1, keep seed inputs in the corpus even if "
+  "they do not produce new coverage. When used with |reduce_inputs==1|, the "
+  "seed inputs will never be reduced. This option can be useful when seeds are"
+  "not properly formed for the fuzz target but still have useful snippets.")
 FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
 FUZZER_FLAG_INT(mutate_depth, 5,
             "Apply this number of consecutive mutations to each input.")
diff --git a/FuzzerFork.cpp b/FuzzerFork.cpp
index d9e6b79..84725d2 100644
--- a/FuzzerFork.cpp
+++ b/FuzzerFork.cpp
@@ -309,11 +309,15 @@
   else
     Env.MainCorpusDir = CorpusDirs[0];
 
-  auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
-  CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
-                      {}, &Env.Cov,
-                      CFPath, false);
-  RemoveFile(CFPath);
+  if (Options.KeepSeed) {
+    for (auto &File : SeedFiles)
+      Env.Files.push_back(File.File);
+  } else {
+    auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
+    CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
+                        {}, &Env.Cov, CFPath, false);
+    RemoveFile(CFPath);
+  }
   Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
          Env.Files.size(), Env.TempDir.c_str());
 
diff --git a/FuzzerInternal.h b/FuzzerInternal.h
index 31096ce..2b172d9 100644
--- a/FuzzerInternal.h
+++ b/FuzzerInternal.h
@@ -67,7 +67,8 @@
 
   void ExecuteCallback(const uint8_t *Data, size_t Size);
   bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false,
-              InputInfo *II = nullptr, bool *FoundUniqFeatures = nullptr);
+              InputInfo *II = nullptr, bool ForceAddToCorpus = false,
+              bool *FoundUniqFeatures = nullptr);
 
   // Merge Corpora[1:] into Corpora[0].
   void Merge(const Vector<std::string> &Corpora);
diff --git a/FuzzerLoop.cpp b/FuzzerLoop.cpp
index 02db6d2..d76075b 100644
--- a/FuzzerLoop.cpp
+++ b/FuzzerLoop.cpp
@@ -464,7 +464,8 @@
 }
 
 bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
-                    InputInfo *II, bool *FoundUniqFeatures) {
+                    InputInfo *II, bool ForceAddToCorpus,
+                    bool *FoundUniqFeatures) {
   if (!Size)
     return false;
 
@@ -478,7 +479,7 @@
       UniqFeatureSetTmp.push_back(Feature);
     if (Options.Entropic)
       Corpus.UpdateFeatureFrequency(II, Feature);
-    if (Options.ReduceInputs && II)
+    if (Options.ReduceInputs && II && !II->NeverReduce)
       if (std::binary_search(II->UniqFeatureSet.begin(),
                              II->UniqFeatureSet.end(), Feature))
         FoundUniqFeaturesOfII++;
@@ -487,11 +488,12 @@
     *FoundUniqFeatures = FoundUniqFeaturesOfII;
   PrintPulseAndReportSlowInput(Data, Size);
   size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore;
-  if (NumNewFeatures) {
+  if (NumNewFeatures || ForceAddToCorpus) {
     TPC.UpdateObservedPCs();
-    auto NewII = Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures,
-                                    MayDeleteFile, TPC.ObservedFocusFunction(),
-                                    UniqFeatureSetTmp, DFT, II);
+    auto NewII =
+        Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
+                           TPC.ObservedFocusFunction(), ForceAddToCorpus,
+                           UniqFeatureSetTmp, DFT, II);
     WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1),
                           NewII->UniqFeatureSet);
     return true;
@@ -700,7 +702,7 @@
 
     bool FoundUniqFeatures = false;
     bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II,
-                         &FoundUniqFeatures);
+                         /*ForceAddToCorpus*/ false, &FoundUniqFeatures);
     TryDetectingAMemoryLeak(CurrentUnitData, Size,
                             /*DuringInitialCorpusExecution*/ false);
     if (NewCov) {
@@ -768,7 +770,9 @@
     for (auto &SF : CorporaFiles) {
       auto U = FileToVector(SF.File, MaxInputLen, /*ExitOnError=*/false);
       assert(U.size() <= MaxInputLen);
-      RunOne(U.data(), U.size());
+      RunOne(U.data(), U.size(), /*MayDeleteFile*/ false, /*II*/ nullptr,
+             /*ForceAddToCorpus*/ Options.KeepSeed,
+             /*FoundUniqFeatures*/ nullptr);
       CheckExitOnSrcPosOrItem();
       TryDetectingAMemoryLeak(U.data(), U.size(),
                               /*DuringInitialCorpusExecution*/ true);
diff --git a/FuzzerOptions.h b/FuzzerOptions.h
index b75e7c7..2696197 100644
--- a/FuzzerOptions.h
+++ b/FuzzerOptions.h
@@ -18,6 +18,7 @@
   int Verbosity = 1;
   size_t MaxLen = 0;
   size_t LenControl = 1000;
+  bool KeepSeed = false;
   int UnitTimeoutSec = 300;
   int TimeoutExitCode = 70;
   int OOMExitCode = 71;
diff --git a/tests/FuzzerUnittest.cpp b/tests/FuzzerUnittest.cpp
index 0e9435a..93b54f5 100644
--- a/tests/FuzzerUnittest.cpp
+++ b/tests/FuzzerUnittest.cpp
@@ -597,8 +597,10 @@
   size_t N = 10;
   size_t TriesPerUnit = 1<<16;
   for (size_t i = 0; i < N; i++)
-    C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, 1, false, false, {}, DFT,
-                   nullptr);
+    C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, /*NumFeatures*/ 1,
+                   /*MayDeleteFile*/ false, /*HasFocusFunction*/ false,
+                   /*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT,
+                   /*BaseII*/ nullptr);
 
   Vector<size_t> Hist(N);
   for (size_t i = 0; i < N * TriesPerUnit; i++) {