[libFuzzer] remove the data-flow-trace (DFT) python scripts; their functionality is now part of libFuzzer proper; also write functions.txt to the disk only if this file doesn't exist yet

llvm-svn: 361452
GitOrigin-RevId: eac9a7830b6b8442762a89f1439f14a0311b6648
diff --git a/FuzzerDataFlowTrace.cpp b/FuzzerDataFlowTrace.cpp
index 74bfa8f..c67238e 100644
--- a/FuzzerDataFlowTrace.cpp
+++ b/FuzzerDataFlowTrace.cpp
@@ -310,11 +310,14 @@
       OF << C << std::endl;
   }
   RemoveFile(Temp);
-  // Write functions.txt.
-  Command Cmd;
-  Cmd.addArgument(DFTBinary);
-  Cmd.setOutputFile(DirPlusFile(DirPath, "functions.txt"));
-  ExecuteCommand(Cmd);
+  // Write functions.txt if it's currently empty or doesn't exist.
+  auto FunctionsTxtPath = DirPlusFile(DirPath, "functions.txt");
+  if (FileToString(FunctionsTxtPath).empty()) {
+    Command Cmd;
+    Cmd.addArgument(DFTBinary);
+    Cmd.setOutputFile(FunctionsTxtPath);
+    ExecuteCommand(Cmd);
+  }
   return 0;
 }
 
diff --git a/scripts/collect_data_flow.py b/scripts/collect_data_flow.py
deleted file mode 100755
index bd601eb..0000000
--- a/scripts/collect_data_flow.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-#===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===#
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-#===------------------------------------------------------------------------===#
-# Runs the data-flow tracer several times on the same input in order to collect
-# the complete trace for all input bytes (running it on all bytes at once
-# may fail if DFSan runs out of labels).
-# Usage:
-#
-#   # Collect dataflow for one input, store it in OUTPUT (default is stdout)
-#   collect_data_flow.py BINARY INPUT [OUTPUT]
-#
-#   # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR
-#   collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR
-#===------------------------------------------------------------------------===#
-import atexit
-import hashlib
-import sys
-import os
-import subprocess
-import tempfile
-import shutil
-
-tmpdir = ""
-
-def cleanup(d):
-  print("removing: %s" % d)
-  shutil.rmtree(d)
-
-def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir):
-  print("Collecting dataflow for corpus: %s output_dir: %s" % (corpus_dir,
-                                                               output_dir))
-  assert not os.path.exists(output_dir)
-  os.mkdir(output_dir)
-  for root, dirs, files in os.walk(corpus_dir):
-    for f in files:
-      path = os.path.join(root, f)
-      with open(path, 'rb') as fh:
-        data = fh.read()
-      sha1 = hashlib.sha1(data).hexdigest()
-      output = os.path.join(output_dir, sha1)
-      subprocess.call([self, exe, path, output])
-  functions_txt = open(os.path.join(output_dir, "functions.txt"), "w")
-  subprocess.call([exe], stdout=functions_txt)
-
-
-def main(argv):
-  exe = argv[1]
-  inp = argv[2]
-  if os.path.isdir(inp):
-    return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3])
-  size = os.path.getsize(inp)
-  q = [[0, size]]
-  tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-")
-  atexit.register(cleanup, tmpdir)
-  print("tmpdir: ", tmpdir)
-  outputs = []
-  while len(q):
-    r = q.pop()
-    print("******* Trying:  ", r)
-    tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
-    ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
-    if ret and r[1] - r[0] >= 2:
-      q.append([r[0], (r[1] + r[0]) // 2])
-      q.append([(r[1] + r[0]) // 2, r[1]])
-    else:
-      outputs.append(tmpfile)
-      print("******* Success: ", r)
-  f = sys.stdout
-  if len(argv) >= 4:
-    f = open(argv[3], "w")
-  merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py")
-  subprocess.call([merge] + outputs, stdout=f)
-
-if __name__ == '__main__':
-  main(sys.argv)
diff --git a/scripts/merge_data_flow.py b/scripts/merge_data_flow.py
deleted file mode 100755
index 9f69018..0000000
--- a/scripts/merge_data_flow.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env python3
-#===- lib/fuzzer/scripts/merge_data_flow.py ------------------------------===#
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-#===------------------------------------------------------------------------===#
-# Merge several data flow traces into one.
-# Usage:
-#   merge_data_flow.py trace1 trace2 ...  > result
-#===------------------------------------------------------------------------===#
-import sys
-import fileinput
-from array import array
-
-def Merge(a, b):
-  res = array('b')
-  for i in range(0, len(a)):
-    res.append(ord('1' if a[i] == '1' or b[i] == '1' else '0'))
-  return res.tostring().decode('utf-8')
-
-def main(argv):
-  D = {}
-  C = {}
-  # read the lines.
-  for line in fileinput.input():
-    # collect the coverage.
-    if line.startswith('C'):
-      COV = line.strip().split(' ')
-      F = COV[0];
-      if not F in C:
-        C[F] = {0}
-      for B in COV[1:]:
-        C[F].add(int(B))
-      continue
-    # collect the data flow trace.
-    [F,BV] = line.strip().split(' ')
-    if F in D:
-      D[F] = Merge(D[F], BV)
-    else:
-      D[F] = BV;
-  # print the combined data flow trace.
-  for F in D.keys():
-    if isinstance(D[F], str):
-      value = D[F]
-    else:
-      value = D[F].decode('utf-8')
-    print("%s %s" % (F, value))
-  # print the combined coverage
-  for F in C.keys():
-    print("%s" % F, end="")
-    for B in list(C[F])[1:]:
-      print(" %s" % B, end="")
-    print()
-
-if __name__ == '__main__':
-  main(sys.argv)