[libFuzzer] Fix DataFlow.cpp logic when tracing long inputs. Summary: 1. Do not create DFSan labels for the bytes which we do not trace. This is where we run out of labels at the first place. 2. When dumping the traces on the disk, make sure to offset the label identifiers by the number of the first byte in the trace range. 3. For the last label, make sure to write it at the last position of the trace bit string, as that label represents the input size, not any particular byte. Also fixed the bug with division in python which I've introduced when migrated the scripts to Python3 (`//` is required for integral division). Otherwise, the scripts are wasting too much time unsuccessfully trying to collect and process traces from the long inputs. For more context, see https://github.com/google/oss-fuzz/issues/1632#issuecomment-481761789 Reviewers: kcc Reviewed By: kcc Subscribers: delcypher, #sanitizers, llvm-commits Tags: #llvm, #sanitizers Differential Revision: https://reviews.llvm.org/D60538 llvm-svn: 358311 GitOrigin-RevId: b6e6d3c740a4b94a64ad62745a18571f1a9cb3cb

commit: 3b3f49297c5a03367a9d9a534dfbdf0d587e1304 [log] [tgz]
author: Max Moroz <mmoroz@chromium.org> Fri Apr 12 21:00:12 2019 +0000
committer: Copybara-Service <copybara-worker@google.com> Thu Mar 25 23:19:26 2021 -0700
tree: 231c781bb5d9d01f8b17ca5b532cc11e38aeed38
parent: e84207334a404aa4434e73472cda07504b51c566 [diff]
diff --git a/dataflow/DataFlow.cpp b/dataflow/DataFlow.cpp
index 742e956..187a8e5 100644
--- a/dataflow/DataFlow.cpp
+++ b/dataflow/DataFlow.cpp

@@ -63,6 +63,9 @@
 } // extern "C"
 
 static size_t InputLen;
+static size_t InputLabelBeg;
+static size_t InputLabelEnd;
+static size_t InputSizeLabel;
 static size_t NumFuncs;
 static const uintptr_t *FuncsBeg;
 static __thread size_t CurrentFunc;
@@ -95,8 +98,10 @@
     return;
   LabelSeen[L] = true;
   assert(L);
-  if (L <= InputLen + 1) {
-    Bytes[L - 1] = '1';
+  if (L < InputSizeLabel) {
+    Bytes[L + InputLabelBeg - 1] = '1';
+  } else if (L == InputSizeLabel) {
+    Bytes[InputLen] = '1';
   } else {
     auto *DLI = dfsan_get_label_info(L);
     SetBytesForLabel(DLI->l1, Bytes);
@@ -124,9 +129,9 @@
   if (argc == 1)
     return PrintFunctions();
   assert(argc == 4 || argc == 5);
-  size_t Beg = atoi(argv[1]);
-  size_t End = atoi(argv[2]);
-  assert(Beg < End);
+  InputLabelBeg = atoi(argv[1]);
+  InputLabelEnd = atoi(argv[2]);
+  assert(InputLabelBeg < InputLabelEnd);
 
   const char *Input = argv[3];
   fprintf(stderr, "INFO: reading '%s'\n", Input);
@@ -143,14 +148,16 @@
 
   fprintf(stderr, "INFO: running '%s'\n", Input);
   for (size_t I = 1; I <= InputLen; I++) {
-    dfsan_label L = dfsan_create_label("", nullptr);
-    assert(L == I);
     size_t Idx = I - 1;
-    if (Idx >= Beg && Idx < End)
+    if (Idx >= InputLabelBeg && Idx < InputLabelEnd) {
+      dfsan_label L = dfsan_create_label("", nullptr);
+      assert(L == I - InputLabelBeg);
       dfsan_set_label(L, Buf + Idx, 1);
+    }
   }
   dfsan_label SizeL = dfsan_create_label("", nullptr);
-  assert(SizeL == InputLen + 1);
+  InputSizeLabel = SizeL;
+  assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1);
   dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
 
   LLVMFuzzerTestOneInput(Buf, InputLen);

diff --git a/scripts/collect_data_flow.py b/scripts/collect_data_flow.py
index e8b56a7..bd601eb 100755
--- a/scripts/collect_data_flow.py
+++ b/scripts/collect_data_flow.py

@@ -65,8 +65,8 @@
     tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
     ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
     if ret and r[1] - r[0] >= 2:
-      q.append([r[0], (r[1] + r[0]) / 2])
-      q.append([(r[1] + r[0]) / 2, r[1]])
+      q.append([r[0], (r[1] + r[0]) // 2])
+      q.append([(r[1] + r[0]) // 2, r[1]])
     else:
       outputs.append(tmpfile)
       print("******* Success: ", r)
commit	3b3f49297c5a03367a9d9a534dfbdf0d587e1304	[log] [tgz]
author	Max Moroz <mmoroz@chromium.org>	Fri Apr 12 21:00:12 2019 +0000
committer	Copybara-Service <copybara-worker@google.com>	Thu Mar 25 23:19:26 2021 -0700
tree	231c781bb5d9d01f8b17ca5b532cc11e38aeed38
parent	e84207334a404aa4434e73472cda07504b51c566 [diff]