[libFuzzer] Fix DataFlow.cpp logic when tracing long inputs.

Summary:
1. Do not create DFSan labels for the bytes which we do not trace. This is where we run out of labels at the first place.
2. When dumping the traces on the disk, make sure to offset the label identifiers by the number of the first byte in the trace range.
3. For the last label, make sure to write it at the last position of the trace bit string, as that label represents the input size, not any particular byte.

Also fixed the bug with division in python which I've introduced when migrated the scripts to Python3 (`//` is required for integral division).

Otherwise, the scripts are wasting too much time unsuccessfully trying to
collect and process traces from the long inputs. For more context, see
https://github.com/google/oss-fuzz/issues/1632#issuecomment-481761789

Reviewers: kcc

Reviewed By: kcc

Subscribers: delcypher, #sanitizers, llvm-commits

Tags: #llvm, #sanitizers

Differential Revision: https://reviews.llvm.org/D60538

llvm-svn: 358311
GitOrigin-RevId: b6e6d3c740a4b94a64ad62745a18571f1a9cb3cb
diff --git a/dataflow/DataFlow.cpp b/dataflow/DataFlow.cpp
index 742e956..187a8e5 100644
--- a/dataflow/DataFlow.cpp
+++ b/dataflow/DataFlow.cpp
@@ -63,6 +63,9 @@
 } // extern "C"
 
 static size_t InputLen;
+static size_t InputLabelBeg;
+static size_t InputLabelEnd;
+static size_t InputSizeLabel;
 static size_t NumFuncs;
 static const uintptr_t *FuncsBeg;
 static __thread size_t CurrentFunc;
@@ -95,8 +98,10 @@
     return;
   LabelSeen[L] = true;
   assert(L);
-  if (L <= InputLen + 1) {
-    Bytes[L - 1] = '1';
+  if (L < InputSizeLabel) {
+    Bytes[L + InputLabelBeg - 1] = '1';
+  } else if (L == InputSizeLabel) {
+    Bytes[InputLen] = '1';
   } else {
     auto *DLI = dfsan_get_label_info(L);
     SetBytesForLabel(DLI->l1, Bytes);
@@ -124,9 +129,9 @@
   if (argc == 1)
     return PrintFunctions();
   assert(argc == 4 || argc == 5);
-  size_t Beg = atoi(argv[1]);
-  size_t End = atoi(argv[2]);
-  assert(Beg < End);
+  InputLabelBeg = atoi(argv[1]);
+  InputLabelEnd = atoi(argv[2]);
+  assert(InputLabelBeg < InputLabelEnd);
 
   const char *Input = argv[3];
   fprintf(stderr, "INFO: reading '%s'\n", Input);
@@ -143,14 +148,16 @@
 
   fprintf(stderr, "INFO: running '%s'\n", Input);
   for (size_t I = 1; I <= InputLen; I++) {
-    dfsan_label L = dfsan_create_label("", nullptr);
-    assert(L == I);
     size_t Idx = I - 1;
-    if (Idx >= Beg && Idx < End)
+    if (Idx >= InputLabelBeg && Idx < InputLabelEnd) {
+      dfsan_label L = dfsan_create_label("", nullptr);
+      assert(L == I - InputLabelBeg);
       dfsan_set_label(L, Buf + Idx, 1);
+    }
   }
   dfsan_label SizeL = dfsan_create_label("", nullptr);
-  assert(SizeL == InputLen + 1);
+  InputSizeLabel = SizeL;
+  assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1);
   dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
 
   LLVMFuzzerTestOneInput(Buf, InputLen);
diff --git a/scripts/collect_data_flow.py b/scripts/collect_data_flow.py
index e8b56a7..bd601eb 100755
--- a/scripts/collect_data_flow.py
+++ b/scripts/collect_data_flow.py
@@ -65,8 +65,8 @@
     tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
     ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
     if ret and r[1] - r[0] >= 2:
-      q.append([r[0], (r[1] + r[0]) / 2])
-      q.append([(r[1] + r[0]) / 2, r[1]])
+      q.append([r[0], (r[1] + r[0]) // 2])
+      q.append([(r[1] + r[0]) // 2, r[1]])
     else:
       outputs.append(tmpfile)
       print("******* Success: ", r)