scripts/analyze-project-deps.py - llvm-project/lldb - Git at Google

 #!/usr/bin/env python

 import argparse
 import itertools
 import os
 import re
 import sys
 from collections import defaultdict

 from use_lldb_suite import lldb_root

 parser = argparse.ArgumentParser(
     description='Analyze LLDB project #include dependencies.')
 parser.add_argument('--show-counts', default=False, action='store_true',
     help='When true, show the number of dependencies from each subproject')
 parser.add_argument('--discover-cycles', default=False, action='store_true',
     help='When true, find and display all project dependency cycles.  Note,'
          'this option is very slow')

 args = parser.parse_args()

 src_dir = os.path.join(lldb_root, "source")
 inc_dir = os.path.join(lldb_root, "include")

 src_map = {}

 include_regex = re.compile('#include \"((lldb|Plugins|clang)(.*/)+).*\"')

 def is_sublist(small, big):
     it = iter(big)
     return all(c in it for c in small)

 def normalize_host(str):
     if str.startswith("lldb/Host"):
         return "lldb/Host"
     if str.startswith("Plugins"):
         return "lldb/" + str
     if str.startswith("lldb/../../source"):
         return str.replace("lldb/../../source", "lldb")
     return str

 def scan_deps(this_dir, file):
     global src_map
     deps = {}
     this_dir = normalize_host(this_dir)
     if this_dir in src_map:
         deps = src_map[this_dir]

     with open(file) as f:
         for line in list(f):
             m = include_regex.match(line)
             if m is None:
                 continue
             relative = m.groups()[0].rstrip("/")
             if relative == this_dir:
                 continue
             relative = normalize_host(relative)
             if relative in deps:
                 deps[relative] += 1
             elif relative != this_dir:
                 deps[relative] = 1
     if this_dir not in src_map and len(deps) > 0:
         src_map[this_dir] = deps

 for (base, dirs, files) in os.walk(inc_dir):
     dir = os.path.basename(base)
     relative = os.path.relpath(base, inc_dir)
     inc_files = [x for x in files if os.path.splitext(x)[1] in [".h"]]
     relative = relative.replace("\\", "/")
     for inc in inc_files:
         inc_path = os.path.join(base, inc)
         scan_deps(relative, inc_path)

 for (base, dirs, files) in os.walk(src_dir):
     dir = os.path.basename(base)
     relative = os.path.relpath(base, src_dir)
     src_files = [x for x in files if os.path.splitext(x)[1] in [".cpp", ".h", ".mm"]]
     norm_base_path = os.path.normpath(os.path.join("lldb", relative))
     norm_base_path = norm_base_path.replace("\\", "/")
     for src in src_files:
         src_path = os.path.join(base, src)
         scan_deps(norm_base_path, src_path)
     pass

 def is_existing_cycle(path, cycles):
     # If we have a cycle like # A -> B -> C (with an implicit -> A at the end)
     # then we don't just want to check for an occurrence of A -> B -> C in the
     # list of known cycles, but every possible rotation of A -> B -> C.  For
     # example, if we previously encountered B -> C -> A (with an implicit -> B
     # at the end), then A -> B -> C is also a cycle.  This is an important
     # optimization which reduces the search space by multiple orders of
     # magnitude.
     for i in range(0,len(path)):
         if any(is_sublist(x, path) for x in cycles):
             return True
         path = [path[-1]] + path[0:-1]
     return False

 def expand(path_queue, path_lengths, cycles, src_map):
     # We do a breadth first search, to make sure we visit all paths in order
     # of ascending length.  This is an important optimization to make sure that
     # short cycles are discovered first, which will allow us to discard longer
     # cycles which grow the search space exponentially the longer they get.
     while len(path_queue) > 0:
         cur_path = path_queue.pop(0)
         if is_existing_cycle(cur_path, cycles):
             continue

         next_len = path_lengths.pop(0) + 1
         last_component = cur_path[-1]

         for item in src_map.get(last_component, []):
             if item.startswith("clang"):
                 continue

             if item in cur_path:
                 # This is a cycle.  Minimize it and then check if the result is
                 # already in the list of cycles.  Insert it (or not) and then
                 # exit.
                 new_index = cur_path.index(item)
                 cycle = cur_path[new_index:]
                 if not is_existing_cycle(cycle, cycles):
                     cycles.append(cycle)
                 continue

             path_lengths.append(next_len)
             path_queue.append(cur_path + [item])
     pass

 cycles = []

 path_queue = [[x] for x in iter(src_map)]
 path_lens = [1] * len(path_queue)

 items = list(src_map.items())
 items.sort(key = lambda A : A[0])

 for (path, deps) in items:
     print(path + ":")
     sorted_deps = list(deps.items())
     if args.show_counts:
         sorted_deps.sort(key = lambda A: (A[1], A[0]))
         for dep in sorted_deps:
             print("\t{} [{}]".format(dep[0], dep[1]))
     else:
         sorted_deps.sort(key = lambda A: A[0])
         for dep in sorted_deps:
             print("\t{}".format(dep[0]))

 def iter_cycles(cycles):
     global src_map
     for cycle in cycles:
         cycle.append(cycle[0])
         zipper = list(zip(cycle[0:-1], cycle[1:]))
         result = [(x, src_map[x][y], y) for (x,y) in zipper]
         total = 0
         smallest = result[0][1]
         for (first, value, last) in result:
             total += value
             smallest = min(smallest, value)
         yield (total, smallest, result)

 if args.discover_cycles:
     print("Analyzing cycles...")

     expand(path_queue, path_lens, cycles, src_map)

     average = sum([len(x)+1 for x in cycles]) / len(cycles)

     print("Found {} cycles.  Average cycle length = {}.".format(len(cycles), average))
     counted = list(iter_cycles(cycles))
     if args.show_counts:
         counted.sort(key = lambda A: A[0])
         for (total, smallest, cycle) in counted:
             sys.stdout.write("{} deps to break: ".format(total))
             sys.stdout.write(cycle[0][0])
             for (first, count, last) in cycle:
                 sys.stdout.write(" [{}->] {}".format(count, last))
             sys.stdout.write("\n")
     else:
         for cycle in cycles:
             cycle.append(cycle[0])
             print(" -> ".join(cycle))

     print("Analyzing islands...")
     islands = []
     outgoing_counts = defaultdict(int)
     incoming_counts = defaultdict(int)
     for (total, smallest, cycle) in counted:
         for (first, count, last) in cycle:
             outgoing_counts[first] += count
             incoming_counts[last] += count
     for cycle in cycles:
         this_cycle = set(cycle)
         disjoints = [x for x in islands if this_cycle.isdisjoint(x)]
         overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]
         islands = disjoints + [set.union(this_cycle, *overlaps)]
     print("Found {} disjoint cycle islands...".format(len(islands)))
     for island in islands:
         print("Island ({} elements)".format(len(island)))
         sorted = []
         for node in island:
             sorted.append((node, incoming_counts[node], outgoing_counts[node]))
         sorted.sort(key = lambda x: x[1]+x[2])
         for (node, inc, outg) in sorted:
             print("  {} [{} in, {} out]".format(node, inc, outg))
     sys.stdout.flush()
 pass
	#!/usr/bin/env python

	import argparse
	import itertools
	import os
	import re
	import sys
	from collections import defaultdict

	from use_lldb_suite import lldb_root

	parser = argparse.ArgumentParser(
	description='Analyze LLDB project #include dependencies.')
	parser.add_argument('--show-counts', default=False, action='store_true',
	help='When true, show the number of dependencies from each subproject')
	parser.add_argument('--discover-cycles', default=False, action='store_true',
	help='When true, find and display all project dependency cycles. Note,'
	'this option is very slow')

	args = parser.parse_args()

	src_dir = os.path.join(lldb_root, "source")
	inc_dir = os.path.join(lldb_root, "include")

	src_map = {}

	include_regex = re.compile('#include \"((lldb\|Plugins\|clang)(./)+).\"')

	def is_sublist(small, big):
	it = iter(big)
	return all(c in it for c in small)

	def normalize_host(str):
	if str.startswith("lldb/Host"):
	return "lldb/Host"
	if str.startswith("Plugins"):
	return "lldb/" + str
	if str.startswith("lldb/../../source"):
	return str.replace("lldb/../../source", "lldb")
	return str

	def scan_deps(this_dir, file):
	global src_map
	deps = {}
	this_dir = normalize_host(this_dir)
	if this_dir in src_map:
	deps = src_map[this_dir]

	with open(file) as f:
	for line in list(f):
	m = include_regex.match(line)
	if m is None:
	continue
	relative = m.groups()[0].rstrip("/")
	if relative == this_dir:
	continue
	relative = normalize_host(relative)
	if relative in deps:
	deps[relative] += 1
	elif relative != this_dir:
	deps[relative] = 1
	if this_dir not in src_map and len(deps) > 0:
	src_map[this_dir] = deps

	for (base, dirs, files) in os.walk(inc_dir):
	dir = os.path.basename(base)
	relative = os.path.relpath(base, inc_dir)
	inc_files = [x for x in files if os.path.splitext(x)[1] in [".h"]]
	relative = relative.replace("\\", "/")
	for inc in inc_files:
	inc_path = os.path.join(base, inc)
	scan_deps(relative, inc_path)

	for (base, dirs, files) in os.walk(src_dir):
	dir = os.path.basename(base)
	relative = os.path.relpath(base, src_dir)
	src_files = [x for x in files if os.path.splitext(x)[1] in [".cpp", ".h", ".mm"]]
	norm_base_path = os.path.normpath(os.path.join("lldb", relative))
	norm_base_path = norm_base_path.replace("\\", "/")
	for src in src_files:
	src_path = os.path.join(base, src)
	scan_deps(norm_base_path, src_path)
	pass

	def is_existing_cycle(path, cycles):
	# If we have a cycle like # A -> B -> C (with an implicit -> A at the end)
	# then we don't just want to check for an occurrence of A -> B -> C in the
	# list of known cycles, but every possible rotation of A -> B -> C. For
	# example, if we previously encountered B -> C -> A (with an implicit -> B
	# at the end), then A -> B -> C is also a cycle. This is an important
	# optimization which reduces the search space by multiple orders of
	# magnitude.
	for i in range(0,len(path)):
	if any(is_sublist(x, path) for x in cycles):
	return True
	path = [path[-1]] + path[0:-1]
	return False

	def expand(path_queue, path_lengths, cycles, src_map):
	# We do a breadth first search, to make sure we visit all paths in order
	# of ascending length. This is an important optimization to make sure that
	# short cycles are discovered first, which will allow us to discard longer
	# cycles which grow the search space exponentially the longer they get.
	while len(path_queue) > 0:
	cur_path = path_queue.pop(0)
	if is_existing_cycle(cur_path, cycles):
	continue

	next_len = path_lengths.pop(0) + 1
	last_component = cur_path[-1]

	for item in src_map.get(last_component, []):
	if item.startswith("clang"):
	continue

	if item in cur_path:
	# This is a cycle. Minimize it and then check if the result is
	# already in the list of cycles. Insert it (or not) and then
	# exit.
	new_index = cur_path.index(item)
	cycle = cur_path[new_index:]
	if not is_existing_cycle(cycle, cycles):
	cycles.append(cycle)
	continue

	path_lengths.append(next_len)
	path_queue.append(cur_path + [item])
	pass

	cycles = []

	path_queue = [[x] for x in iter(src_map)]
	path_lens = [1] * len(path_queue)

	items = list(src_map.items())
	items.sort(key = lambda A : A[0])

	for (path, deps) in items:
	print(path + ":")
	sorted_deps = list(deps.items())
	if args.show_counts:
	sorted_deps.sort(key = lambda A: (A[1], A[0]))
	for dep in sorted_deps:
	print("\t{} [{}]".format(dep[0], dep[1]))
	else:
	sorted_deps.sort(key = lambda A: A[0])
	for dep in sorted_deps:
	print("\t{}".format(dep[0]))

	def iter_cycles(cycles):
	global src_map
	for cycle in cycles:
	cycle.append(cycle[0])
	zipper = list(zip(cycle[0:-1], cycle[1:]))
	result = [(x, src_map[x][y], y) for (x,y) in zipper]
	total = 0
	smallest = result[0][1]
	for (first, value, last) in result:
	total += value
	smallest = min(smallest, value)
	yield (total, smallest, result)

	if args.discover_cycles:
	print("Analyzing cycles...")

	expand(path_queue, path_lens, cycles, src_map)

	average = sum([len(x)+1 for x in cycles]) / len(cycles)

	print("Found {} cycles. Average cycle length = {}.".format(len(cycles), average))
	counted = list(iter_cycles(cycles))
	if args.show_counts:
	counted.sort(key = lambda A: A[0])
	for (total, smallest, cycle) in counted:
	sys.stdout.write("{} deps to break: ".format(total))
	sys.stdout.write(cycle[0][0])
	for (first, count, last) in cycle:
	sys.stdout.write(" [{}->] {}".format(count, last))
	sys.stdout.write("\n")
	else:
	for cycle in cycles:
	cycle.append(cycle[0])
	print(" -> ".join(cycle))

	print("Analyzing islands...")
	islands = []
	outgoing_counts = defaultdict(int)
	incoming_counts = defaultdict(int)
	for (total, smallest, cycle) in counted:
	for (first, count, last) in cycle:
	outgoing_counts[first] += count
	incoming_counts[last] += count
	for cycle in cycles:
	this_cycle = set(cycle)
	disjoints = [x for x in islands if this_cycle.isdisjoint(x)]
	overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]
	islands = disjoints + [set.union(this_cycle, *overlaps)]
	print("Found {} disjoint cycle islands...".format(len(islands)))
	for island in islands:
	print("Island ({} elements)".format(len(island)))
	sorted = []
	for node in island:
	sorted.append((node, incoming_counts[node], outgoing_counts[node]))
	sorted.sort(key = lambda x: x[1]+x[2])
	for (node, inc, outg) in sorted:
	print(" {} [{} in, {} out]".format(node, inc, outg))
	sys.stdout.flush()
	pass