[libFuzzer] DFT: when dumping coverage, also dump the total number of instrumented blocks in a function; update merge_data_flow.py to merge coverage

llvm-svn: 360272
GitOrigin-RevId: e13eff293db2fa12de11e8087ef62950d0cd8f83
diff --git a/FuzzerDataFlowTrace.cpp b/FuzzerDataFlowTrace.cpp
index 5ae7510..604fe15 100644
--- a/FuzzerDataFlowTrace.cpp
+++ b/FuzzerDataFlowTrace.cpp
@@ -52,6 +52,8 @@
     // Printf("=== %s\n", Name.c_str());
     std::ifstream IF(SF.File);
     while (std::getline(IF, L, '\n')) {
+      if (!L.empty() && L[0] == 'C')
+        continue; // Ignore coverage.
       size_t SpacePos = L.find(' ');
       if (SpacePos == std::string::npos)
         return ParseError("no space in the trace line");
diff --git a/dataflow/DataFlow.cpp b/dataflow/DataFlow.cpp
index d51c3f0..989675e 100644
--- a/dataflow/DataFlow.cpp
+++ b/dataflow/DataFlow.cpp
@@ -44,14 +44,15 @@
 // ===============
 //  F0 11111111111111
 //  F1 10000000000000
-//  C0 1 2 3 4
-//  C1
+//  C0 1 2 3 4 5
+//  C1 8
 //  ===============
 // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
 //    The byte string is LEN+1 bytes. The last byte is set if the function
 //    depends on the input length.
-// "CN X Y Z": tells that a function N has basic blocks X, Y, and Z covered
-//    in addition to the function's entry block.
+// "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered
+//    in addition to the function's entry block, out of T total instrumented
+//    blocks.
 //
 //===----------------------------------------------------------------------===*/
 
@@ -87,6 +88,10 @@
   PCFLAG_FUNC_ENTRY = 1,
 };
 
+static inline bool BlockIsEntry(size_t BlockIdx) {
+  return PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY;
+}
+
 // Prints all instrumented functions.
 static int PrintFunctions() {
   // We don't have the symbolizer integrated with dfsan yet.
@@ -99,8 +104,7 @@
                      "| sed 's/dfs\\$//g'", "w");
   for (size_t I = 0; I < NumGuards; I++) {
     uintptr_t PC = PCsBeg[I * 2];
-    uintptr_t PCFlags = PCsBeg[I * 2 + 1];
-    if (!(PCFlags & PCFLAG_FUNC_ENTRY)) continue;
+    if (!BlockIsEntry(I)) continue;
     void *const Buf[1] = {(void*)PC};
     backtrace_symbols_fd(Buf, 1, fileno(Pipe));
   }
@@ -142,23 +146,22 @@
 static void PrintCoverage(FILE *Out) {
   ssize_t CurrentFuncGuard = -1;
   ssize_t CurrentFuncNum = -1;
-  int NumFuncsCovered = 0;
-  for (size_t I = 0; I < NumGuards; I++) {
-    bool IsEntry = PCsBeg[I * 2 + 1] & PCFLAG_FUNC_ENTRY;
-    if (IsEntry) {
-      CurrentFuncNum++;
-      CurrentFuncGuard = I;
+  ssize_t NumBlocksInCurrentFunc = -1;
+  for (size_t FuncBeg = 0; FuncBeg < NumGuards;) {
+    CurrentFuncNum++;
+    assert(BlockIsEntry(FuncBeg));
+    size_t FuncEnd = FuncBeg + 1;
+    for (; FuncEnd < NumGuards && !BlockIsEntry(FuncEnd); FuncEnd++)
+      ;
+    if (BBExecuted[FuncBeg]) {
+      fprintf(Out, "C%zd", CurrentFuncNum);
+      for (size_t I = FuncBeg + 1; I < FuncEnd; I++)
+        if (BBExecuted[I])
+          fprintf(Out, " %zd", I - FuncBeg);
+      fprintf(Out, " %zd\n", FuncEnd - FuncBeg);
     }
-    if (!BBExecuted[I]) continue;
-    if (IsEntry) {
-      if (NumFuncsCovered) fprintf(Out, "\n");
-      fprintf(Out, "C%zd ", CurrentFuncNum);
-      NumFuncsCovered++;
-    } else {
-      fprintf(Out, "%zd ", I - CurrentFuncGuard);
-    }
+    FuncBeg = FuncEnd;
   }
-  fprintf(Out, "\n");
 }
 
 int main(int argc, char **argv) {
@@ -229,7 +232,7 @@
   PCsEnd = pcs_end;
   assert(NumGuards == (PCsEnd - PCsBeg) / 2);
   for (size_t i = 0; i < NumGuards; i++) {
-    if (PCsBeg[i * 2 + 1] & PCFLAG_FUNC_ENTRY) {
+    if (BlockIsEntry(i)) {
       NumFuncs++;
       GuardsBeg[i] = NumFuncs;
     }
diff --git a/scripts/merge_data_flow.py b/scripts/merge_data_flow.py
index d6000fa..9f69018 100755
--- a/scripts/merge_data_flow.py
+++ b/scripts/merge_data_flow.py
@@ -22,20 +22,37 @@
 
 def main(argv):
   D = {}
+  C = {}
+  # read the lines.
   for line in fileinput.input():
+    # collect the coverage.
     if line.startswith('C'):
+      COV = line.strip().split(' ')
+      F = COV[0];
+      if not F in C:
+        C[F] = {0}
+      for B in COV[1:]:
+        C[F].add(int(B))
       continue
+    # collect the data flow trace.
     [F,BV] = line.strip().split(' ')
     if F in D:
       D[F] = Merge(D[F], BV)
     else:
       D[F] = BV;
+  # print the combined data flow trace.
   for F in D.keys():
     if isinstance(D[F], str):
       value = D[F]
     else:
       value = D[F].decode('utf-8')
     print("%s %s" % (F, value))
+  # print the combined coverage
+  for F in C.keys():
+    print("%s" % F, end="")
+    for B in list(C[F])[1:]:
+      print(" %s" % B, end="")
+    print()
 
 if __name__ == '__main__':
   main(sys.argv)