blob: bd601eb63abdea820d2345bb2acf90b86c139048 [file] [log] [blame]
#!/usr/bin/env python3
#===- lib/fuzzer/scripts/collect_data_flow.py ------------------------------===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===------------------------------------------------------------------------===#
# Runs the data-flow tracer several times on the same input in order to collect
# the complete trace for all input bytes (running it on all bytes at once
# may fail if DFSan runs out of labels).
# Usage:
#
# # Collect dataflow for one input, store it in OUTPUT (default is stdout)
# collect_data_flow.py BINARY INPUT [OUTPUT]
#
# # Collect dataflow for all inputs in CORPUS_DIR, store them in OUTPUT_DIR
# collect_data_flow.py BINARY CORPUS_DIR OUTPUT_DIR
#===------------------------------------------------------------------------===#
import atexit
import hashlib
import sys
import os
import subprocess
import tempfile
import shutil
tmpdir = ""
def cleanup(d):
print("removing: %s" % d)
shutil.rmtree(d)
def collect_dataflow_for_corpus(self, exe, corpus_dir, output_dir):
print("Collecting dataflow for corpus: %s output_dir: %s" % (corpus_dir,
output_dir))
assert not os.path.exists(output_dir)
os.mkdir(output_dir)
for root, dirs, files in os.walk(corpus_dir):
for f in files:
path = os.path.join(root, f)
with open(path, 'rb') as fh:
data = fh.read()
sha1 = hashlib.sha1(data).hexdigest()
output = os.path.join(output_dir, sha1)
subprocess.call([self, exe, path, output])
functions_txt = open(os.path.join(output_dir, "functions.txt"), "w")
subprocess.call([exe], stdout=functions_txt)
def main(argv):
exe = argv[1]
inp = argv[2]
if os.path.isdir(inp):
return collect_dataflow_for_corpus(argv[0], exe, inp, argv[3])
size = os.path.getsize(inp)
q = [[0, size]]
tmpdir = tempfile.mkdtemp(prefix="libfuzzer-tmp-")
atexit.register(cleanup, tmpdir)
print("tmpdir: ", tmpdir)
outputs = []
while len(q):
r = q.pop()
print("******* Trying: ", r)
tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
if ret and r[1] - r[0] >= 2:
q.append([r[0], (r[1] + r[0]) // 2])
q.append([(r[1] + r[0]) // 2, r[1]])
else:
outputs.append(tmpfile)
print("******* Success: ", r)
f = sys.stdout
if len(argv) >= 4:
f = open(argv[3], "w")
merge = os.path.join(os.path.dirname(argv[0]), "merge_data_flow.py")
subprocess.call([merge] + outputs, stdout=f)
if __name__ == '__main__':
main(sys.argv)