|  | #!/usr/bin/env python3 | 
|  | # | 
|  | # This is a tool that works like debug location coverage calculator. | 
|  | # It parses the llvm-dwarfdump --statistics output by reporting it | 
|  | # in a more human readable way. | 
|  | # | 
|  |  | 
|  | from __future__ import print_function | 
|  | import argparse | 
|  | import os | 
|  | import sys | 
|  | from json import loads | 
|  | from math import ceil | 
|  | from collections import OrderedDict | 
|  | from subprocess import Popen, PIPE | 
|  |  | 
|  | # This special value has been used to mark statistics that overflowed. | 
|  | TAINT_VALUE = "tainted" | 
|  |  | 
|  | # Initialize the plot. | 
|  | def init_plot(plt): | 
|  | plt.title("Debug Location Statistics", fontweight="bold") | 
|  | plt.xlabel("location buckets") | 
|  | plt.ylabel("number of variables in the location buckets") | 
|  | plt.xticks(rotation=45, fontsize="x-small") | 
|  | plt.yticks() | 
|  |  | 
|  |  | 
|  | # Finalize the plot. | 
|  | def finish_plot(plt): | 
|  | plt.legend() | 
|  | plt.grid(color="grey", which="major", axis="y", linestyle="-", linewidth=0.3) | 
|  | plt.savefig("locstats.png") | 
|  | print('The plot was saved within "locstats.png".') | 
|  |  | 
|  |  | 
|  | # Holds the debug location statistics. | 
|  | class LocationStats: | 
|  | def __init__( | 
|  | self, | 
|  | file_name, | 
|  | variables_total, | 
|  | variables_total_locstats, | 
|  | variables_with_loc, | 
|  | variables_scope_bytes_covered, | 
|  | variables_scope_bytes, | 
|  | variables_coverage_map, | 
|  | ): | 
|  | self.file_name = file_name | 
|  | self.variables_total = variables_total | 
|  | self.variables_total_locstats = variables_total_locstats | 
|  | self.variables_with_loc = variables_with_loc | 
|  | self.scope_bytes_covered = variables_scope_bytes_covered | 
|  | self.scope_bytes = variables_scope_bytes | 
|  | self.variables_coverage_map = variables_coverage_map | 
|  |  | 
|  | # Get the PC ranges coverage. | 
|  | def get_pc_coverage(self): | 
|  | if self.scope_bytes_covered == TAINT_VALUE or self.scope_bytes == TAINT_VALUE: | 
|  | return TAINT_VALUE | 
|  | pc_ranges_covered = int( | 
|  | ceil(self.scope_bytes_covered * 100.0) / self.scope_bytes | 
|  | ) | 
|  | return pc_ranges_covered | 
|  |  | 
|  | # Pretty print the debug location buckets. | 
|  | def pretty_print(self): | 
|  | if self.scope_bytes == 0: | 
|  | print("No scope bytes found.") | 
|  | return -1 | 
|  |  | 
|  | pc_ranges_covered = self.get_pc_coverage() | 
|  | variables_coverage_per_map = {} | 
|  | for cov_bucket in coverage_buckets(): | 
|  | variables_coverage_per_map[cov_bucket] = None | 
|  | if ( | 
|  | self.variables_coverage_map[cov_bucket] == TAINT_VALUE | 
|  | or self.variables_total_locstats == TAINT_VALUE | 
|  | ): | 
|  | variables_coverage_per_map[cov_bucket] = TAINT_VALUE | 
|  | else: | 
|  | variables_coverage_per_map[cov_bucket] = int( | 
|  | ceil(self.variables_coverage_map[cov_bucket] * 100.0) | 
|  | / self.variables_total_locstats | 
|  | ) | 
|  |  | 
|  | print(" =================================================") | 
|  | print("            Debug Location Statistics       ") | 
|  | print(" =================================================") | 
|  | print("     cov%           samples         percentage(~)  ") | 
|  | print(" -------------------------------------------------") | 
|  | for cov_bucket in coverage_buckets(): | 
|  | if ( | 
|  | self.variables_coverage_map[cov_bucket] | 
|  | or self.variables_total_locstats == TAINT_VALUE | 
|  | ): | 
|  | print( | 
|  | "   {0:10}     {1:8}              {2:3}%".format( | 
|  | cov_bucket, | 
|  | self.variables_coverage_map[cov_bucket], | 
|  | variables_coverage_per_map[cov_bucket], | 
|  | ) | 
|  | ) | 
|  | else: | 
|  | print( | 
|  | "   {0:10}     {1:8d}              {2:3d}%".format( | 
|  | cov_bucket, | 
|  | self.variables_coverage_map[cov_bucket], | 
|  | variables_coverage_per_map[cov_bucket], | 
|  | ) | 
|  | ) | 
|  | print(" =================================================") | 
|  | print( | 
|  | " -the number of debug variables processed: " | 
|  | + str(self.variables_total_locstats) | 
|  | ) | 
|  | print(" -PC ranges covered: " + str(pc_ranges_covered) + "%") | 
|  |  | 
|  | # Only if we are processing all the variables output the total | 
|  | # availability. | 
|  | if self.variables_total and self.variables_with_loc: | 
|  | total_availability = None | 
|  | if ( | 
|  | self.variables_total == TAINT_VALUE | 
|  | or self.variables_with_loc == TAINT_VALUE | 
|  | ): | 
|  | total_availability = TAINT_VALUE | 
|  | else: | 
|  | total_availability = int( | 
|  | ceil(self.variables_with_loc * 100.0) / self.variables_total | 
|  | ) | 
|  | print(" -------------------------------------------------") | 
|  | print(" -total availability: " + str(total_availability) + "%") | 
|  | print(" =================================================") | 
|  |  | 
|  | return 0 | 
|  |  | 
|  | # Draw a plot representing the location buckets. | 
|  | def draw_plot(self): | 
|  | from matplotlib import pyplot as plt | 
|  |  | 
|  | buckets = range(len(self.variables_coverage_map)) | 
|  | plt.figure(figsize=(12, 8)) | 
|  | init_plot(plt) | 
|  | plt.bar( | 
|  | buckets, | 
|  | self.variables_coverage_map.values(), | 
|  | align="center", | 
|  | tick_label=self.variables_coverage_map.keys(), | 
|  | label="variables of {}".format(self.file_name), | 
|  | ) | 
|  |  | 
|  | # Place the text box with the coverage info. | 
|  | pc_ranges_covered = self.get_pc_coverage() | 
|  | props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) | 
|  | plt.text( | 
|  | 0.02, | 
|  | 0.90, | 
|  | "PC ranges covered: {}%".format(pc_ranges_covered), | 
|  | transform=plt.gca().transAxes, | 
|  | fontsize=12, | 
|  | verticalalignment="top", | 
|  | bbox=props, | 
|  | ) | 
|  |  | 
|  | finish_plot(plt) | 
|  |  | 
|  | # Compare the two LocationStats objects and draw a plot showing | 
|  | # the difference. | 
|  | def draw_location_diff(self, locstats_to_compare): | 
|  | from matplotlib import pyplot as plt | 
|  |  | 
|  | pc_ranges_covered = self.get_pc_coverage() | 
|  | pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage() | 
|  |  | 
|  | buckets = range(len(self.variables_coverage_map)) | 
|  | buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map)) | 
|  |  | 
|  | fig = plt.figure(figsize=(12, 8)) | 
|  | ax = fig.add_subplot(111) | 
|  | init_plot(plt) | 
|  |  | 
|  | comparison_keys = list(coverage_buckets()) | 
|  | ax.bar( | 
|  | buckets, | 
|  | self.variables_coverage_map.values(), | 
|  | align="edge", | 
|  | width=0.4, | 
|  | label="variables of {}".format(self.file_name), | 
|  | ) | 
|  | ax.bar( | 
|  | buckets_to_compare, | 
|  | locstats_to_compare.variables_coverage_map.values(), | 
|  | color="r", | 
|  | align="edge", | 
|  | width=-0.4, | 
|  | label="variables of {}".format(locstats_to_compare.file_name), | 
|  | ) | 
|  | ax.set_xticks(range(len(comparison_keys))) | 
|  | ax.set_xticklabels(comparison_keys) | 
|  |  | 
|  | props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) | 
|  | plt.text( | 
|  | 0.02, | 
|  | 0.88, | 
|  | "{} PC ranges covered: {}%".format(self.file_name, pc_ranges_covered), | 
|  | transform=plt.gca().transAxes, | 
|  | fontsize=12, | 
|  | verticalalignment="top", | 
|  | bbox=props, | 
|  | ) | 
|  | plt.text( | 
|  | 0.02, | 
|  | 0.83, | 
|  | "{} PC ranges covered: {}%".format( | 
|  | locstats_to_compare.file_name, pc_ranges_covered_to_compare | 
|  | ), | 
|  | transform=plt.gca().transAxes, | 
|  | fontsize=12, | 
|  | verticalalignment="top", | 
|  | bbox=props, | 
|  | ) | 
|  |  | 
|  | finish_plot(plt) | 
|  |  | 
|  |  | 
|  | # Define the location buckets. | 
|  | def coverage_buckets(): | 
|  | yield "0%" | 
|  | yield "(0%,10%)" | 
|  | for start in range(10, 91, 10): | 
|  | yield "[{0}%,{1}%)".format(start, start + 10) | 
|  | yield "100%" | 
|  |  | 
|  |  | 
|  | # Parse the JSON representing the debug statistics, and create a | 
|  | # LocationStats object. | 
|  | def parse_locstats(opts, binary): | 
|  | # These will be different due to different options enabled. | 
|  | variables_total = None | 
|  | variables_total_locstats = None | 
|  | variables_with_loc = None | 
|  | variables_scope_bytes_covered = None | 
|  | variables_scope_bytes = None | 
|  | variables_scope_bytes_entry_values = None | 
|  | variables_coverage_map = OrderedDict() | 
|  |  | 
|  | # Get the directory of the LLVM tools. | 
|  | llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), "llvm-dwarfdump") | 
|  | # The statistics llvm-dwarfdump option. | 
|  | llvm_dwarfdump_stats_opt = "--statistics" | 
|  |  | 
|  | # Generate the stats with the llvm-dwarfdump. | 
|  | subproc = Popen( | 
|  | [llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], | 
|  | stdin=PIPE, | 
|  | stdout=PIPE, | 
|  | stderr=PIPE, | 
|  | universal_newlines=True, | 
|  | ) | 
|  | cmd_stdout, cmd_stderr = subproc.communicate() | 
|  |  | 
|  | # TODO: Handle errors that are coming from llvm-dwarfdump. | 
|  |  | 
|  | # Get the JSON and parse it. | 
|  | json_parsed = None | 
|  |  | 
|  | try: | 
|  | json_parsed = loads(cmd_stdout) | 
|  | except: | 
|  | print("error: No valid llvm-dwarfdump statistics found.") | 
|  | sys.exit(1) | 
|  |  | 
|  | # TODO: Parse the statistics Version from JSON. | 
|  |  | 
|  | def init_field(name): | 
|  | if json_parsed[name] == "overflowed": | 
|  | print('warning: "' + name + '" field overflowed.') | 
|  | return TAINT_VALUE | 
|  | return json_parsed[name] | 
|  |  | 
|  | if opts.only_variables: | 
|  | # Read the JSON only for local variables. | 
|  | variables_total_locstats = init_field( | 
|  | "#local vars processed by location statistics" | 
|  | ) | 
|  | variables_scope_bytes_covered = init_field( | 
|  | "sum_all_local_vars(#bytes in parent scope covered" " by DW_AT_location)" | 
|  | ) | 
|  | variables_scope_bytes = init_field("sum_all_local_vars(#bytes in parent scope)") | 
|  | if not opts.ignore_debug_entry_values: | 
|  | for cov_bucket in coverage_buckets(): | 
|  | cov_category = ( | 
|  | "#local vars with {} of parent scope covered " | 
|  | "by DW_AT_location".format(cov_bucket) | 
|  | ) | 
|  | variables_coverage_map[cov_bucket] = init_field(cov_category) | 
|  | else: | 
|  | variables_scope_bytes_entry_values = init_field( | 
|  | "sum_all_local_vars(#bytes in parent scope " | 
|  | "covered by DW_OP_entry_value)" | 
|  | ) | 
|  | if ( | 
|  | variables_scope_bytes_covered != TAINT_VALUE | 
|  | and variables_scope_bytes_entry_values != TAINT_VALUE | 
|  | ): | 
|  | variables_scope_bytes_covered = ( | 
|  | variables_scope_bytes_covered - variables_scope_bytes_entry_values | 
|  | ) | 
|  | for cov_bucket in coverage_buckets(): | 
|  | cov_category = ( | 
|  | "#local vars - entry values with {} of parent scope " | 
|  | "covered by DW_AT_location".format(cov_bucket) | 
|  | ) | 
|  | variables_coverage_map[cov_bucket] = init_field(cov_category) | 
|  | elif opts.only_formal_parameters: | 
|  | # Read the JSON only for formal parameters. | 
|  | variables_total_locstats = init_field( | 
|  | "#params processed by location statistics" | 
|  | ) | 
|  | variables_scope_bytes_covered = init_field( | 
|  | "sum_all_params(#bytes in parent scope covered " "by DW_AT_location)" | 
|  | ) | 
|  | variables_scope_bytes = init_field("sum_all_params(#bytes in parent scope)") | 
|  | if not opts.ignore_debug_entry_values: | 
|  | for cov_bucket in coverage_buckets(): | 
|  | cov_category = ( | 
|  | "#params with {} of parent scope covered " | 
|  | "by DW_AT_location".format(cov_bucket) | 
|  | ) | 
|  | variables_coverage_map[cov_bucket] = init_field(cov_category) | 
|  | else: | 
|  | variables_scope_bytes_entry_values = init_field( | 
|  | "sum_all_params(#bytes in parent scope covered " "by DW_OP_entry_value)" | 
|  | ) | 
|  | if ( | 
|  | variables_scope_bytes_covered != TAINT_VALUE | 
|  | and variables_scope_bytes_entry_values != TAINT_VALUE | 
|  | ): | 
|  | variables_scope_bytes_covered = ( | 
|  | variables_scope_bytes_covered - variables_scope_bytes_entry_values | 
|  | ) | 
|  | for cov_bucket in coverage_buckets(): | 
|  | cov_category = ( | 
|  | "#params - entry values with {} of parent scope covered" | 
|  | " by DW_AT_location".format(cov_bucket) | 
|  | ) | 
|  | variables_coverage_map[cov_bucket] = init_field(cov_category) | 
|  | else: | 
|  | # Read the JSON for both local variables and formal parameters. | 
|  | variables_total = init_field("#source variables") | 
|  | variables_with_loc = init_field("#source variables with location") | 
|  | variables_total_locstats = init_field( | 
|  | "#variables processed by location statistics" | 
|  | ) | 
|  | variables_scope_bytes_covered = init_field( | 
|  | "sum_all_variables(#bytes in parent scope covered " "by DW_AT_location)" | 
|  | ) | 
|  | variables_scope_bytes = init_field("sum_all_variables(#bytes in parent scope)") | 
|  |  | 
|  | if not opts.ignore_debug_entry_values: | 
|  | for cov_bucket in coverage_buckets(): | 
|  | cov_category = ( | 
|  | "#variables with {} of parent scope covered " | 
|  | "by DW_AT_location".format(cov_bucket) | 
|  | ) | 
|  | variables_coverage_map[cov_bucket] = init_field(cov_category) | 
|  | else: | 
|  | variables_scope_bytes_entry_values = init_field( | 
|  | "sum_all_variables(#bytes in parent scope covered " | 
|  | "by DW_OP_entry_value)" | 
|  | ) | 
|  | if ( | 
|  | variables_scope_bytes_covered != TAINT_VALUE | 
|  | and variables_scope_bytes_entry_values != TAINT_VALUE | 
|  | ): | 
|  | variables_scope_bytes_covered = ( | 
|  | variables_scope_bytes_covered - variables_scope_bytes_entry_values | 
|  | ) | 
|  | for cov_bucket in coverage_buckets(): | 
|  | cov_category = ( | 
|  | "#variables - entry values with {} of parent scope covered " | 
|  | "by DW_AT_location".format(cov_bucket) | 
|  | ) | 
|  | variables_coverage_map[cov_bucket] = init_field(cov_category) | 
|  |  | 
|  | return LocationStats( | 
|  | binary, | 
|  | variables_total, | 
|  | variables_total_locstats, | 
|  | variables_with_loc, | 
|  | variables_scope_bytes_covered, | 
|  | variables_scope_bytes, | 
|  | variables_coverage_map, | 
|  | ) | 
|  |  | 
|  |  | 
|  | # Parse the program arguments. | 
|  | def parse_program_args(parser): | 
|  | parser.add_argument( | 
|  | "--only-variables", | 
|  | action="store_true", | 
|  | default=False, | 
|  | help="calculate the location statistics only for local variables", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--only-formal-parameters", | 
|  | action="store_true", | 
|  | default=False, | 
|  | help="calculate the location statistics only for formal parameters", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--ignore-debug-entry-values", | 
|  | action="store_true", | 
|  | default=False, | 
|  | help="ignore the location statistics on locations with " "entry values", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--draw-plot", | 
|  | action="store_true", | 
|  | default=False, | 
|  | help="show histogram of location buckets generated (requires " "matplotlib)", | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--compare", | 
|  | action="store_true", | 
|  | default=False, | 
|  | help="compare the debug location coverage on two files provided, " | 
|  | "and draw a plot showing the difference  (requires " | 
|  | "matplotlib)", | 
|  | ) | 
|  | parser.add_argument("file_names", nargs="+", type=str, help="file to process") | 
|  |  | 
|  | return parser.parse_args() | 
|  |  | 
|  |  | 
|  | # Verify that the program inputs meet the requirements. | 
|  | def verify_program_inputs(opts): | 
|  | if len(sys.argv) < 2: | 
|  | print("error: Too few arguments.") | 
|  | return False | 
|  |  | 
|  | if opts.only_variables and opts.only_formal_parameters: | 
|  | print("error: Please use just one --only* option.") | 
|  | return False | 
|  |  | 
|  | if not opts.compare and len(opts.file_names) != 1: | 
|  | print("error: Please specify only one file to process.") | 
|  | return False | 
|  |  | 
|  | if opts.compare and len(opts.file_names) != 2: | 
|  | print("error: Please specify two files to process.") | 
|  | return False | 
|  |  | 
|  | if opts.draw_plot or opts.compare: | 
|  | try: | 
|  | import matplotlib | 
|  | except ImportError: | 
|  | print("error: matplotlib not found.") | 
|  | return False | 
|  |  | 
|  | return True | 
|  |  | 
|  |  | 
|  | def Main(): | 
|  | parser = argparse.ArgumentParser() | 
|  | opts = parse_program_args(parser) | 
|  |  | 
|  | if not verify_program_inputs(opts): | 
|  | parser.print_help() | 
|  | sys.exit(1) | 
|  |  | 
|  | binary_file = opts.file_names[0] | 
|  | locstats = parse_locstats(opts, binary_file) | 
|  |  | 
|  | if not opts.compare: | 
|  | if opts.draw_plot: | 
|  | # Draw a histogram representing the location buckets. | 
|  | locstats.draw_plot() | 
|  | else: | 
|  | # Pretty print collected info on the standard output. | 
|  | if locstats.pretty_print() == -1: | 
|  | sys.exit(0) | 
|  | else: | 
|  | binary_file_to_compare = opts.file_names[1] | 
|  | locstats_to_compare = parse_locstats(opts, binary_file_to_compare) | 
|  | # Draw a plot showing the difference in debug location coverage between | 
|  | # two files. | 
|  | locstats.draw_location_diff(locstats_to_compare) | 
|  |  | 
|  |  | 
|  | if __name__ == "__main__": | 
|  | Main() | 
|  | sys.exit(0) |