| #!/usr/bin/env python |
| # |
| # This is a tool that works like debug location coverage calculator. |
| # It parses the llvm-dwarfdump --statistics output by reporting it |
| # in a more human readable way. |
| # |
| |
| from __future__ import print_function |
| import argparse |
| import os |
| import sys |
| from json import loads |
| from math import ceil |
| from collections import OrderedDict |
| from subprocess import Popen, PIPE |
| |
| # This special value has been used to mark statistics that overflowed. |
| TAINT_VALUE = "tainted" |
| |
| # Initialize the plot. |
| def init_plot(plt): |
| plt.title('Debug Location Statistics', fontweight='bold') |
| plt.xlabel('location buckets') |
| plt.ylabel('number of variables in the location buckets') |
| plt.xticks(rotation=45, fontsize='x-small') |
| plt.yticks() |
| |
| # Finalize the plot. |
| def finish_plot(plt): |
| plt.legend() |
| plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3) |
| plt.savefig('locstats.png') |
| print('The plot was saved within "locstats.png".') |
| |
| # Holds the debug location statistics. |
| class LocationStats: |
| def __init__(self, file_name, variables_total, variables_total_locstats, |
| variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes, |
| variables_coverage_map): |
| self.file_name = file_name |
| self.variables_total = variables_total |
| self.variables_total_locstats = variables_total_locstats |
| self.variables_with_loc = variables_with_loc |
| self.scope_bytes_covered = variables_scope_bytes_covered |
| self.scope_bytes = variables_scope_bytes |
| self.variables_coverage_map = variables_coverage_map |
| |
| # Get the PC ranges coverage. |
| def get_pc_coverage(self): |
| if self.scope_bytes_covered == TAINT_VALUE or \ |
| self.scope_bytes == TAINT_VALUE: |
| return TAINT_VALUE |
| pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \ |
| / self.scope_bytes) |
| return pc_ranges_covered |
| |
| # Pretty print the debug location buckets. |
| def pretty_print(self): |
| if self.scope_bytes == 0: |
| print ('No scope bytes found.') |
| return -1 |
| |
| pc_ranges_covered = self.get_pc_coverage() |
| variables_coverage_per_map = {} |
| for cov_bucket in coverage_buckets(): |
| variables_coverage_per_map[cov_bucket] = None |
| if self.variables_coverage_map[cov_bucket] == TAINT_VALUE or \ |
| self.variables_total_locstats == TAINT_VALUE: |
| variables_coverage_per_map[cov_bucket] = TAINT_VALUE |
| else: |
| variables_coverage_per_map[cov_bucket] = \ |
| int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \ |
| / self.variables_total_locstats) |
| |
| print (' =================================================') |
| print (' Debug Location Statistics ') |
| print (' =================================================') |
| print (' cov% samples percentage(~) ') |
| print (' -------------------------------------------------') |
| for cov_bucket in coverage_buckets(): |
| if self.variables_coverage_map[cov_bucket] or \ |
| self.variables_total_locstats == TAINT_VALUE: |
| print (' {0:10} {1:8} {2:3}%'. \ |
| format(cov_bucket, self.variables_coverage_map[cov_bucket], \ |
| variables_coverage_per_map[cov_bucket])) |
| else: |
| print (' {0:10} {1:8d} {2:3d}%'. \ |
| format(cov_bucket, self.variables_coverage_map[cov_bucket], \ |
| variables_coverage_per_map[cov_bucket])) |
| print (' =================================================') |
| print (' -the number of debug variables processed: ' \ |
| + str(self.variables_total_locstats)) |
| print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%') |
| |
| # Only if we are processing all the variables output the total |
| # availability. |
| if self.variables_total and self.variables_with_loc: |
| total_availability = None |
| if self.variables_total == TAINT_VALUE or \ |
| self.variables_with_loc == TAINT_VALUE: |
| total_availability = TAINT_VALUE |
| else: |
| total_availability = int(ceil(self.variables_with_loc * 100.0) \ |
| / self.variables_total) |
| print (' -------------------------------------------------') |
| print (' -total availability: ' + str(total_availability) + '%') |
| print (' =================================================') |
| |
| return 0 |
| |
| # Draw a plot representing the location buckets. |
| def draw_plot(self): |
| from matplotlib import pyplot as plt |
| |
| buckets = range(len(self.variables_coverage_map)) |
| plt.figure(figsize=(12, 8)) |
| init_plot(plt) |
| plt.bar(buckets, self.variables_coverage_map.values(), align='center', |
| tick_label=self.variables_coverage_map.keys(), |
| label='variables of {}'.format(self.file_name)) |
| |
| # Place the text box with the coverage info. |
| pc_ranges_covered = self.get_pc_coverage() |
| props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) |
| plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered), |
| transform=plt.gca().transAxes, fontsize=12, |
| verticalalignment='top', bbox=props) |
| |
| finish_plot(plt) |
| |
| # Compare the two LocationStats objects and draw a plot showing |
| # the difference. |
| def draw_location_diff(self, locstats_to_compare): |
| from matplotlib import pyplot as plt |
| |
| pc_ranges_covered = self.get_pc_coverage() |
| pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage() |
| |
| buckets = range(len(self.variables_coverage_map)) |
| buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map)) |
| |
| fig = plt.figure(figsize=(12, 8)) |
| ax = fig.add_subplot(111) |
| init_plot(plt) |
| |
| comparison_keys = list(coverage_buckets()) |
| ax.bar(buckets, self.variables_coverage_map.values(), align='edge', |
| width=0.4, |
| label='variables of {}'.format(self.file_name)) |
| ax.bar(buckets_to_compare, |
| locstats_to_compare.variables_coverage_map.values(), |
| color='r', align='edge', width=-0.4, |
| label='variables of {}'.format(locstats_to_compare.file_name)) |
| ax.set_xticks(range(len(comparison_keys))) |
| ax.set_xticklabels(comparison_keys) |
| |
| props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) |
| plt.text(0.02, 0.88, |
| '{} PC ranges covered: {}%'. \ |
| format(self.file_name, pc_ranges_covered), |
| transform=plt.gca().transAxes, fontsize=12, |
| verticalalignment='top', bbox=props) |
| plt.text(0.02, 0.83, |
| '{} PC ranges covered: {}%'. \ |
| format(locstats_to_compare.file_name, |
| pc_ranges_covered_to_compare), |
| transform=plt.gca().transAxes, fontsize=12, |
| verticalalignment='top', bbox=props) |
| |
| finish_plot(plt) |
| |
| # Define the location buckets. |
| def coverage_buckets(): |
| yield '0%' |
| yield '(0%,10%)' |
| for start in range(10, 91, 10): |
| yield '[{0}%,{1}%)'.format(start, start + 10) |
| yield '100%' |
| |
| # Parse the JSON representing the debug statistics, and create a |
| # LocationStats object. |
| def parse_locstats(opts, binary): |
| # These will be different due to different options enabled. |
| variables_total = None |
| variables_total_locstats = None |
| variables_with_loc = None |
| variables_scope_bytes_covered = None |
| variables_scope_bytes = None |
| variables_scope_bytes_entry_values = None |
| variables_coverage_map = OrderedDict() |
| |
| # Get the directory of the LLVM tools. |
| llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \ |
| "llvm-dwarfdump") |
| # The statistics llvm-dwarfdump option. |
| llvm_dwarfdump_stats_opt = "--statistics" |
| |
| # Generate the stats with the llvm-dwarfdump. |
| subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \ |
| stdin=PIPE, stdout=PIPE, stderr=PIPE, \ |
| universal_newlines = True) |
| cmd_stdout, cmd_stderr = subproc.communicate() |
| |
| # TODO: Handle errors that are coming from llvm-dwarfdump. |
| |
| # Get the JSON and parse it. |
| json_parsed = None |
| |
| try: |
| json_parsed = loads(cmd_stdout) |
| except: |
| print ('error: No valid llvm-dwarfdump statistics found.') |
| sys.exit(1) |
| |
| # TODO: Parse the statistics Version from JSON. |
| |
| def init_field(name): |
| if json_parsed[name] == 'overflowed': |
| print ('warning: "' + name + '" field overflowed.') |
| return TAINT_VALUE |
| return json_parsed[name] |
| |
| if opts.only_variables: |
| # Read the JSON only for local variables. |
| variables_total_locstats = \ |
| init_field('#local vars processed by location statistics') |
| variables_scope_bytes_covered = \ |
| init_field('sum_all_local_vars(#bytes in parent scope covered' \ |
| ' by DW_AT_location)') |
| variables_scope_bytes = \ |
| init_field('sum_all_local_vars(#bytes in parent scope)') |
| if not opts.ignore_debug_entry_values: |
| for cov_bucket in coverage_buckets(): |
| cov_category = "#local vars with {} of parent scope covered " \ |
| "by DW_AT_location".format(cov_bucket) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| else: |
| variables_scope_bytes_entry_values = \ |
| init_field('sum_all_local_vars(#bytes in parent scope ' \ |
| 'covered by DW_OP_entry_value)') |
| if variables_scope_bytes_covered != TAINT_VALUE and \ |
| variables_scope_bytes_entry_values != TAINT_VALUE: |
| variables_scope_bytes_covered = variables_scope_bytes_covered \ |
| - variables_scope_bytes_entry_values |
| for cov_bucket in coverage_buckets(): |
| cov_category = \ |
| "#local vars - entry values with {} of parent scope " \ |
| "covered by DW_AT_location".format(cov_bucket) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| elif opts.only_formal_parameters: |
| # Read the JSON only for formal parameters. |
| variables_total_locstats = \ |
| init_field('#params processed by location statistics') |
| variables_scope_bytes_covered = \ |
| init_field('sum_all_params(#bytes in parent scope covered ' \ |
| 'by DW_AT_location)') |
| variables_scope_bytes = \ |
| init_field('sum_all_params(#bytes in parent scope)') |
| if not opts.ignore_debug_entry_values: |
| for cov_bucket in coverage_buckets(): |
| cov_category = "#params with {} of parent scope covered " \ |
| "by DW_AT_location".format(cov_bucket) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| else: |
| variables_scope_bytes_entry_values = \ |
| init_field('sum_all_params(#bytes in parent scope covered ' \ |
| 'by DW_OP_entry_value)') |
| if variables_scope_bytes_covered != TAINT_VALUE and \ |
| variables_scope_bytes_entry_values != TAINT_VALUE: |
| variables_scope_bytes_covered = variables_scope_bytes_covered \ |
| - variables_scope_bytes_entry_values |
| for cov_bucket in coverage_buckets(): |
| cov_category = \ |
| "#params - entry values with {} of parent scope covered" \ |
| " by DW_AT_location".format(cov_bucket) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| else: |
| # Read the JSON for both local variables and formal parameters. |
| variables_total = \ |
| init_field('#source variables') |
| variables_with_loc = init_field('#source variables with location') |
| variables_total_locstats = \ |
| init_field('#variables processed by location statistics') |
| variables_scope_bytes_covered = \ |
| init_field('sum_all_variables(#bytes in parent scope covered ' \ |
| 'by DW_AT_location)') |
| variables_scope_bytes = \ |
| init_field('sum_all_variables(#bytes in parent scope)') |
| |
| if not opts.ignore_debug_entry_values: |
| for cov_bucket in coverage_buckets(): |
| cov_category = "#variables with {} of parent scope covered " \ |
| "by DW_AT_location".format(cov_bucket) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| else: |
| variables_scope_bytes_entry_values = \ |
| init_field('sum_all_variables(#bytes in parent scope covered ' \ |
| 'by DW_OP_entry_value)') |
| if variables_scope_bytes_covered != TAINT_VALUE and \ |
| variables_scope_bytes_entry_values != TAINT_VALUE: |
| variables_scope_bytes_covered = variables_scope_bytes_covered \ |
| - variables_scope_bytes_entry_values |
| for cov_bucket in coverage_buckets(): |
| cov_category = \ |
| "#variables - entry values with {} of parent scope covered " \ |
| "by DW_AT_location".format(cov_bucket) |
| variables_coverage_map[cov_bucket] = init_field(cov_category) |
| |
| return LocationStats(binary, variables_total, variables_total_locstats, |
| variables_with_loc, variables_scope_bytes_covered, |
| variables_scope_bytes, variables_coverage_map) |
| |
| # Parse the program arguments. |
| def parse_program_args(parser): |
| parser.add_argument('--only-variables', action='store_true', default=False, |
| help='calculate the location statistics only for local variables') |
| parser.add_argument('--only-formal-parameters', action='store_true', |
| default=False, |
| help='calculate the location statistics only for formal parameters') |
| parser.add_argument('--ignore-debug-entry-values', action='store_true', |
| default=False, |
| help='ignore the location statistics on locations with ' |
| 'entry values') |
| parser.add_argument('--draw-plot', action='store_true', default=False, |
| help='show histogram of location buckets generated (requires ' |
| 'matplotlib)') |
| parser.add_argument('--compare', action='store_true', default=False, |
| help='compare the debug location coverage on two files provided, ' |
| 'and draw a plot showing the difference (requires ' |
| 'matplotlib)') |
| parser.add_argument('file_names', nargs='+', type=str, help='file to process') |
| |
| return parser.parse_args() |
| |
| # Verify that the program inputs meet the requirements. |
| def verify_program_inputs(opts): |
| if len(sys.argv) < 2: |
| print ('error: Too few arguments.') |
| return False |
| |
| if opts.only_variables and opts.only_formal_parameters: |
| print ('error: Please use just one --only* option.') |
| return False |
| |
| if not opts.compare and len(opts.file_names) != 1: |
| print ('error: Please specify only one file to process.') |
| return False |
| |
| if opts.compare and len(opts.file_names) != 2: |
| print ('error: Please specify two files to process.') |
| return False |
| |
| if opts.draw_plot or opts.compare: |
| try: |
| import matplotlib |
| except ImportError: |
| print('error: matplotlib not found.') |
| return False |
| |
| return True |
| |
| def Main(): |
| parser = argparse.ArgumentParser() |
| opts = parse_program_args(parser) |
| |
| if not verify_program_inputs(opts): |
| parser.print_help() |
| sys.exit(1) |
| |
| binary_file = opts.file_names[0] |
| locstats = parse_locstats(opts, binary_file) |
| |
| if not opts.compare: |
| if opts.draw_plot: |
| # Draw a histogram representing the location buckets. |
| locstats.draw_plot() |
| else: |
| # Pretty print collected info on the standard output. |
| if locstats.pretty_print() == -1: |
| sys.exit(0) |
| else: |
| binary_file_to_compare = opts.file_names[1] |
| locstats_to_compare = parse_locstats(opts, binary_file_to_compare) |
| # Draw a plot showing the difference in debug location coverage between |
| # two files. |
| locstats.draw_location_diff(locstats_to_compare) |
| |
| if __name__ == '__main__': |
| Main() |
| sys.exit(0) |