| #!/usr/bin/env python |
| # |
| # This is a tool that works like debug location coverage calculator. |
| # It parses the llvm-dwarfdump --statistics output by reporting it |
| # in a more human readable way. |
| # |
| |
| from __future__ import print_function |
| import argparse |
| import os |
| import sys |
| from json import loads |
| from math import ceil |
| from collections import OrderedDict |
| from subprocess import Popen, PIPE |
| |
| # This special value has been used to mark statistics that overflowed. |
| TAINT_VALUE = "tainted" |
| |
| # Initialize the plot. |
| def init_plot(plt): |
| plt.title("Debug Location Statistics", fontweight="bold") |
| plt.xlabel("location buckets") |
| plt.ylabel("number of variables in the location buckets") |
| plt.xticks(rotation=45, fontsize="x-small") |
| plt.yticks() |
| |
| |
| # Finalize the plot. |
| def finish_plot(plt): |
| plt.legend() |
| plt.grid(color="grey", which="major", axis="y", linestyle="-", linewidth=0.3) |
| plt.savefig("locstats.png") |
| print('The plot was saved within "locstats.png".') |
| |
| |
| # Holds the debug location statistics. |
| class LocationStats: |
| def __init__( |
| self, |
| file_name, |
| variables_total, |
| variables_total_locstats, |
| variables_with_loc, |
| variables_scope_bytes_covered, |
| variables_scope_bytes, |
| variables_coverage_map, |
| ): |
| self.file_name = file_name |
| self.variables_total = variables_total |
| self.variables_total_locstats = variables_total_locstats |
| self.variables_with_loc = variables_with_loc |
| self.scope_bytes_covered = variables_scope_bytes_covered |
| self.scope_bytes = variables_scope_bytes |
| self.variables_coverage_map = variables_coverage_map |
| |
| # Get the PC ranges coverage. |
| def get_pc_coverage(self): |
| if self.scope_bytes_covered == TAINT_VALUE or self.scope_bytes == TAINT_VALUE: |
| return TAINT_VALUE |
| pc_ranges_covered = int( |
| ceil(self.scope_bytes_covered * 100.0) / self.scope_bytes |
| ) |
| return pc_ranges_covered |
| |
| # Pretty print the debug location buckets. |
| def pretty_print(self): |
| if self.scope_bytes == 0: |
| print("No scope bytes found.") |
| return -1 |
| |
| pc_ranges_covered = self.get_pc_coverage() |
| variables_coverage_per_map = {} |
| for cov_bucket in coverage_buckets(): |
| variables_coverage_per_map[cov_bucket] = None |
| if ( |
| self.variables_coverage_map[cov_bucket] == TAINT_VALUE |
| or self.variables_total_locstats == TAINT_VALUE |
| ): |
| variables_coverage_per_map[cov_bucket] = TAINT_VALUE |
| else: |
| variables_coverage_per_map[cov_bucket] = int( |
| ceil(self.variables_coverage_map[cov_bucket] * 100.0) |
| / self.variables_total_locstats |
| ) |
| |
| print(" =================================================") |
| print(" Debug Location Statistics ") |
| print(" =================================================") |
| print(" cov% samples percentage(~) ") |
| print(" -------------------------------------------------") |
| for cov_bucket in coverage_buckets(): |
| if ( |
| self.variables_coverage_map[cov_bucket] |
| or self.variables_total_locstats == TAINT_VALUE |
| ): |
| print( |
| " {0:10} {1:8} {2:3}%".format( |
| cov_bucket, |
| self.variables_coverage_map[cov_bucket], |
| variables_coverage_per_map[cov_bucket], |
| ) |
| ) |
| else: |
| print( |
| " {0:10} {1:8d} {2:3d}%".format( |
| cov_bucket, |
| self.variables_coverage_map[cov_bucket], |
| variables_coverage_per_map[cov_bucket], |
| ) |
| ) |
| print(" =================================================") |
| print( |
| " -the number of debug variables processed: " |
| + str(self.variables_total_locstats) |
| ) |
| print(" -PC ranges covered: " + str(pc_ranges_covered) + "%") |
| |
| # Only if we are processing all the variables output the total |
| # availability. |
| if self.variables_total and self.variables_with_loc: |
| total_availability = None |
| if ( |
| self.variables_total == TAINT_VALUE |
| or self.variables_with_loc == TAINT_VALUE |
| ): |
| total_availability = TAINT_VALUE |
| else: |
| total_availability = int( |
| ceil(self.variables_with_loc * 100.0) / self.variables_total |
| ) |
| print(" -------------------------------------------------") |
| print(" -total availability: " + str(total_availability) + "%") |
| print(" =================================================") |
| |
| return 0 |
| |
| # Draw a plot representing the location buckets. |
| def draw_plot(self): |
| from matplotlib import pyplot as plt |
| |
| buckets = range(len(self.variables_coverage_map)) |
| plt.figure(figsize=(12, 8)) |
| init_plot(plt) |
| plt.bar( |
| buckets, |
| self.variables_coverage_map.values(), |
| align="center", |
| tick_label=self.variables_coverage_map.keys(), |
| label="variables of {}".format(self.file_name), |
| ) |
| |
| # Place the text box with the coverage info. |
| pc_ranges_covered = self.get_pc_coverage() |
| props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) |
| plt.text( |
| 0.02, |
| 0.90, |
| "PC ranges covered: {}%".format(pc_ranges_covered), |
| transform=plt.gca().transAxes, |
| fontsize=12, |
| verticalalignment="top", |
| bbox=props, |
| ) |
| |
| finish_plot(plt) |
| |
| # Compare the two LocationStats objects and draw a plot showing |
| # the difference. |
| def draw_location_diff(self, locstats_to_compare): |
| from matplotlib import pyplot as plt |
| |
| pc_ranges_covered = self.get_pc_coverage() |
| pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage() |
| |
| buckets = range(len(self.variables_coverage_map)) |
| buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map)) |
| |
| fig = plt.figure(figsize=(12, 8)) |
| ax = fig.add_subplot(111) |
| init_plot(plt) |
| |
| comparison_keys = list(coverage_buckets()) |
| ax.bar( |
| buckets, |
| self.variables_coverage_map.values(), |
| align="edge", |
| width=0.4, |
| label="variables of {}".format(self.file_name), |
| ) |
| ax.bar( |
| buckets_to_compare, |
| locstats_to_compare.variables_coverage_map.values(), |
| color="r", |
| align="edge", |
| width=-0.4, |
| label="variables of {}".format(locstats_to_compare.file_name), |
| ) |
| ax.set_xticks(range(len(comparison_keys))) |
| ax.set_xticklabels(comparison_keys) |
| |
| props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) |
| plt.text( |
| 0.02, |
| 0.88, |
| "{} PC ranges covered: {}%".format(self.file_name, pc_ranges_covered), |
| transform=plt.gca().transAxes, |
| fontsize=12, |
| verticalalignment="top", |
| bbox=props, |
| ) |
| plt.text( |
| 0.02, |
| 0.83, |
| "{} PC ranges covered: {}%".format( |
| locstats_to_compare.file_name, pc_ranges_covered_to_compare |
| ), |
| transform=plt.gca().transAxes, |
| fontsize=12, |
| verticalalignment="top", |
| bbox=props, |
| ) |
| |
| finish_plot(plt) |
| |
| |
| # Define the location buckets. |
| def coverage_buckets(): |
| yield "0%" |
| yield "(0%,10%)" |
| for start in range(10, 91, 10): |
| yield "[{0}%,{1}%)".format(start, start + 10) |
| yield "100%" |
| |
| |
| # Parse the JSON representing the debug statistics, and create a |
| # LocationStats object. |
| def parse_locstats(opts, binary): |
| # These will be different due to different options enabled. |
| variables_total = None |
| variables_total_locstats = None |
| variables_with_loc = None |
| variables_scope_bytes_covered = None |
| variables_scope_bytes = None |
| variables_scope_bytes_entry_values = None |
| variables_coverage_map = OrderedDict() |
| |
| # Get the directory of the LLVM tools. |
| llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), "llvm-dwarfdump") |
| # The statistics llvm-dwarfdump option. |
| llvm_dwarfdump_stats_opt = "--statistics" |
| |
| # Generate the stats with the llvm-dwarfdump. |
| subproc = Popen( |
| [llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], |
| stdin=PIPE, |
| stdout=PIPE, |
| stderr=PIPE, |
| universal_newlines=True, |
| ) |
| cmd_stdout, cmd_stderr = subproc.communicate() |
| |
| # TODO: Handle errors that are coming from llvm-dwarfdump. |
| |
| # Get the JSON and parse it. |
| json_parsed = None |
| |
| try: |
| json_parsed = loads(cmd_stdout) |
| except: |
| print("error: No valid llvm-dwarfdump statistics found.") |
| sys.exit(1) |
| |
| # TODO: Parse the statistics Version from JSON. |
| |
| def init_field(name): |
| if json_parsed[name] == "overflowed": |
| print('warning: "' + name + '" field overflowed.') |
| return TAINT_VALUE |
| return json_parsed[name] |
| |
| if opts.only_variables: |
| # Read the JSON only for local variables. |
| variables_total_locstats = init_field( |
| "#local vars processed by location statistics" |
| ) |
| variables_scope_bytes_covered = init_field( |
| "sum_all_local_vars(#bytes in parent scope covered" " by DW_AT_location)" |
| ) |
| variables_scope_bytes = init_field("sum_all_local_vars(#bytes in parent scope)") |
| if not opts.ignore_debug_entry_values: |
| for cov_bucket in coverage_buckets(): |
| cov_category = ( |
| "#local vars with {} of parent scope covered " |
| "by DW_AT_location".format(cov_bucket) |
| ) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| else: |
| variables_scope_bytes_entry_values = init_field( |
| "sum_all_local_vars(#bytes in parent scope " |
| "covered by DW_OP_entry_value)" |
| ) |
| if ( |
| variables_scope_bytes_covered != TAINT_VALUE |
| and variables_scope_bytes_entry_values != TAINT_VALUE |
| ): |
| variables_scope_bytes_covered = ( |
| variables_scope_bytes_covered - variables_scope_bytes_entry_values |
| ) |
| for cov_bucket in coverage_buckets(): |
| cov_category = ( |
| "#local vars - entry values with {} of parent scope " |
| "covered by DW_AT_location".format(cov_bucket) |
| ) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| elif opts.only_formal_parameters: |
| # Read the JSON only for formal parameters. |
| variables_total_locstats = init_field( |
| "#params processed by location statistics" |
| ) |
| variables_scope_bytes_covered = init_field( |
| "sum_all_params(#bytes in parent scope covered " "by DW_AT_location)" |
| ) |
| variables_scope_bytes = init_field("sum_all_params(#bytes in parent scope)") |
| if not opts.ignore_debug_entry_values: |
| for cov_bucket in coverage_buckets(): |
| cov_category = ( |
| "#params with {} of parent scope covered " |
| "by DW_AT_location".format(cov_bucket) |
| ) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| else: |
| variables_scope_bytes_entry_values = init_field( |
| "sum_all_params(#bytes in parent scope covered " "by DW_OP_entry_value)" |
| ) |
| if ( |
| variables_scope_bytes_covered != TAINT_VALUE |
| and variables_scope_bytes_entry_values != TAINT_VALUE |
| ): |
| variables_scope_bytes_covered = ( |
| variables_scope_bytes_covered - variables_scope_bytes_entry_values |
| ) |
| for cov_bucket in coverage_buckets(): |
| cov_category = ( |
| "#params - entry values with {} of parent scope covered" |
| " by DW_AT_location".format(cov_bucket) |
| ) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| else: |
| # Read the JSON for both local variables and formal parameters. |
| variables_total = init_field("#source variables") |
| variables_with_loc = init_field("#source variables with location") |
| variables_total_locstats = init_field( |
| "#variables processed by location statistics" |
| ) |
| variables_scope_bytes_covered = init_field( |
| "sum_all_variables(#bytes in parent scope covered " "by DW_AT_location)" |
| ) |
| variables_scope_bytes = init_field("sum_all_variables(#bytes in parent scope)") |
| |
| if not opts.ignore_debug_entry_values: |
| for cov_bucket in coverage_buckets(): |
| cov_category = ( |
| "#variables with {} of parent scope covered " |
| "by DW_AT_location".format(cov_bucket) |
| ) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| else: |
| variables_scope_bytes_entry_values = init_field( |
| "sum_all_variables(#bytes in parent scope covered " |
| "by DW_OP_entry_value)" |
| ) |
| if ( |
| variables_scope_bytes_covered != TAINT_VALUE |
| and variables_scope_bytes_entry_values != TAINT_VALUE |
| ): |
| variables_scope_bytes_covered = ( |
| variables_scope_bytes_covered - variables_scope_bytes_entry_values |
| ) |
| for cov_bucket in coverage_buckets(): |
| cov_category = ( |
| "#variables - entry values with {} of parent scope covered " |
| "by DW_AT_location".format(cov_bucket) |
| ) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| |
| return LocationStats( |
| binary, |
| variables_total, |
| variables_total_locstats, |
| variables_with_loc, |
| variables_scope_bytes_covered, |
| variables_scope_bytes, |
| variables_coverage_map, |
| ) |
| |
| |
| # Parse the program arguments. |
| def parse_program_args(parser): |
| parser.add_argument( |
| "--only-variables", |
| action="store_true", |
| default=False, |
| help="calculate the location statistics only for local variables", |
| ) |
| parser.add_argument( |
| "--only-formal-parameters", |
| action="store_true", |
| default=False, |
| help="calculate the location statistics only for formal parameters", |
| ) |
| parser.add_argument( |
| "--ignore-debug-entry-values", |
| action="store_true", |
| default=False, |
| help="ignore the location statistics on locations with " "entry values", |
| ) |
| parser.add_argument( |
| "--draw-plot", |
| action="store_true", |
| default=False, |
| help="show histogram of location buckets generated (requires " "matplotlib)", |
| ) |
| parser.add_argument( |
| "--compare", |
| action="store_true", |
| default=False, |
| help="compare the debug location coverage on two files provided, " |
| "and draw a plot showing the difference (requires " |
| "matplotlib)", |
| ) |
| parser.add_argument("file_names", nargs="+", type=str, help="file to process") |
| |
| return parser.parse_args() |
| |
| |
| # Verify that the program inputs meet the requirements. |
| def verify_program_inputs(opts): |
| if len(sys.argv) < 2: |
| print("error: Too few arguments.") |
| return False |
| |
| if opts.only_variables and opts.only_formal_parameters: |
| print("error: Please use just one --only* option.") |
| return False |
| |
| if not opts.compare and len(opts.file_names) != 1: |
| print("error: Please specify only one file to process.") |
| return False |
| |
| if opts.compare and len(opts.file_names) != 2: |
| print("error: Please specify two files to process.") |
| return False |
| |
| if opts.draw_plot or opts.compare: |
| try: |
| import matplotlib |
| except ImportError: |
| print("error: matplotlib not found.") |
| return False |
| |
| return True |
| |
| |
| def Main(): |
| parser = argparse.ArgumentParser() |
| opts = parse_program_args(parser) |
| |
| if not verify_program_inputs(opts): |
| parser.print_help() |
| sys.exit(1) |
| |
| binary_file = opts.file_names[0] |
| locstats = parse_locstats(opts, binary_file) |
| |
| if not opts.compare: |
| if opts.draw_plot: |
| # Draw a histogram representing the location buckets. |
| locstats.draw_plot() |
| else: |
| # Pretty print collected info on the standard output. |
| if locstats.pretty_print() == -1: |
| sys.exit(0) |
| else: |
| binary_file_to_compare = opts.file_names[1] |
| locstats_to_compare = parse_locstats(opts, binary_file_to_compare) |
| # Draw a plot showing the difference in debug location coverage between |
| # two files. |
| locstats.draw_location_diff(locstats_to_compare) |
| |
| |
| if __name__ == "__main__": |
| Main() |
| sys.exit(0) |