blob: 8c377860653e0646fd9cfcb15eb170587a3e3936 [file] [log] [blame]
#!/usr/bin/env python2.7
from __future__ import print_function
desc = '''Generate the difference of two YAML files into a new YAML file (works on
pair of directories too). A new attribute 'Added' is set to True or False
depending whether the entry is added or removed from the first input to the
next.
The tools requires PyYAML.'''
import yaml
# Try to use the C parser.
try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
import optrecord
import argparse
from collections import defaultdict
from multiprocessing import cpu_count, Pool
import os, os.path
import fnmatch
def find_files(dir_or_file):
if os.path.isfile(dir_or_file):
return [dir_or_file]
all = []
for dir, subdirs, files in os.walk(dir_or_file):
for file in files:
if fnmatch.fnmatch(file, "*.opt.yaml"):
all.append( os.path.join(dir, file))
return all
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('yaml_dir_or_file_1')
parser.add_argument('yaml_dir_or_file_2')
parser.add_argument(
'--jobs',
'-j',
default=cpu_count(),
type=int,
help='Max job count (defaults to current CPU count)')
parser.add_argument('--output', '-o', default='diff.opt.yaml')
args = parser.parse_args()
if args.jobs == 1:
pmap = map
else:
pool = Pool(processes=args.jobs)
pmap = pool.map
files1 = find_files(args.yaml_dir_or_file_1)
files2 = find_files(args.yaml_dir_or_file_2)
all_remarks1, _, _ = optrecord.gather_results(pmap, files1)
all_remarks2, _, _ = optrecord.gather_results(pmap, files2)
added = set(all_remarks2.values()) - set(all_remarks1.values())
removed = set(all_remarks1.values()) - set(all_remarks2.values())
for r in added:
r.Added = True
for r in removed:
r.Added = False
stream = file(args.output, 'w')
yaml.dump_all(added | removed, stream)