| import difflib |
| import functools |
| import getopt |
| import io |
| import locale |
| import os |
| import sys |
| |
| import util |
| from util import to_string |
| |
| class DiffFlags(): |
| def __init__(self): |
| self.ignore_all_space = False |
| self.ignore_space_change = False |
| self.unified_diff = False |
| self.num_context_lines = 3 |
| self.recursive_diff = False |
| self.strip_trailing_cr = False |
| |
| def getDirTree(path, basedir=""): |
| # Tree is a tuple of form (dirname, child_trees). |
| # An empty dir has child_trees = [], a file has child_trees = None. |
| child_trees = [] |
| for dirname, child_dirs, files in os.walk(os.path.join(basedir, path)): |
| for child_dir in child_dirs: |
| child_trees.append(getDirTree(child_dir, dirname)) |
| for filename in files: |
| child_trees.append((filename, None)) |
| return path, sorted(child_trees) |
| |
| def compareTwoFiles(flags, filepaths): |
| filelines = [] |
| for file in filepaths: |
| if file == "-": |
| stdin_fileno = sys.stdin.fileno() |
| with os.fdopen(os.dup(stdin_fileno), 'rb') as stdin_bin: |
| filelines.append(stdin_bin.readlines()) |
| else: |
| with open(file, 'rb') as file_bin: |
| filelines.append(file_bin.readlines()) |
| |
| try: |
| return compareTwoTextFiles(flags, filepaths, filelines, |
| locale.getpreferredencoding(False)) |
| except UnicodeDecodeError: |
| try: |
| return compareTwoTextFiles(flags, filepaths, filelines, "utf-8") |
| except: |
| return compareTwoBinaryFiles(flags, filepaths, filelines) |
| |
| def compareTwoBinaryFiles(flags, filepaths, filelines): |
| exitCode = 0 |
| if hasattr(difflib, 'diff_bytes'): |
| # python 3.5 or newer |
| diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], |
| filelines[1], filepaths[0].encode(), |
| filepaths[1].encode(), |
| n = flags.num_context_lines) |
| diffs = [diff.decode(errors="backslashreplace") for diff in diffs] |
| else: |
| # python 2.7 |
| if flags.unified_diff: |
| func = difflib.unified_diff |
| else: |
| func = difflib.context_diff |
| diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1], |
| n = flags.num_context_lines) |
| |
| for diff in diffs: |
| sys.stdout.write(to_string(diff)) |
| exitCode = 1 |
| return exitCode |
| |
| def compareTwoTextFiles(flags, filepaths, filelines_bin, encoding): |
| filelines = [] |
| for lines_bin in filelines_bin: |
| lines = [] |
| for line_bin in lines_bin: |
| line = line_bin.decode(encoding=encoding) |
| lines.append(line) |
| filelines.append(lines) |
| |
| exitCode = 0 |
| def compose2(f, g): |
| return lambda x: f(g(x)) |
| |
| f = lambda x: x |
| if flags.strip_trailing_cr: |
| f = compose2(lambda line: line.replace('\r\n', '\n'), f) |
| if flags.ignore_all_space or flags.ignore_space_change: |
| ignoreSpace = lambda line, separator: \ |
| separator.join(line.split()) + "\n" |
| ignoreAllSpaceOrSpaceChange = functools.partial(ignoreSpace, separator='' if flags.ignore_all_space else ' ') |
| f = compose2(ignoreAllSpaceOrSpaceChange, f) |
| |
| for idx, lines in enumerate(filelines): |
| filelines[idx]= [f(line) for line in lines] |
| |
| func = difflib.unified_diff if flags.unified_diff else difflib.context_diff |
| for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1], |
| n = flags.num_context_lines): |
| sys.stdout.write(to_string(diff)) |
| exitCode = 1 |
| return exitCode |
| |
| def printDirVsFile(dir_path, file_path): |
| if os.path.getsize(file_path): |
| msg = "File %s is a directory while file %s is a regular file" |
| else: |
| msg = "File %s is a directory while file %s is a regular empty file" |
| sys.stdout.write(msg % (dir_path, file_path) + "\n") |
| |
| def printFileVsDir(file_path, dir_path): |
| if os.path.getsize(file_path): |
| msg = "File %s is a regular file while file %s is a directory" |
| else: |
| msg = "File %s is a regular empty file while file %s is a directory" |
| sys.stdout.write(msg % (file_path, dir_path) + "\n") |
| |
| def printOnlyIn(basedir, path, name): |
| sys.stdout.write("Only in %s: %s\n" % (os.path.join(basedir, path), name)) |
| |
| def compareDirTrees(flags, dir_trees, base_paths=["", ""]): |
| # Dirnames of the trees are not checked, it's caller's responsibility, |
| # as top-level dirnames are always different. Base paths are important |
| # for doing os.walk, but we don't put it into tree's dirname in order |
| # to speed up string comparison below and while sorting in getDirTree. |
| left_tree, right_tree = dir_trees[0], dir_trees[1] |
| left_base, right_base = base_paths[0], base_paths[1] |
| |
| # Compare two files or report file vs. directory mismatch. |
| if left_tree[1] is None and right_tree[1] is None: |
| return compareTwoFiles(flags, |
| [os.path.join(left_base, left_tree[0]), |
| os.path.join(right_base, right_tree[0])]) |
| |
| if left_tree[1] is None and right_tree[1] is not None: |
| printFileVsDir(os.path.join(left_base, left_tree[0]), |
| os.path.join(right_base, right_tree[0])) |
| return 1 |
| |
| if left_tree[1] is not None and right_tree[1] is None: |
| printDirVsFile(os.path.join(left_base, left_tree[0]), |
| os.path.join(right_base, right_tree[0])) |
| return 1 |
| |
| # Compare two directories via recursive use of compareDirTrees. |
| exitCode = 0 |
| left_names = [node[0] for node in left_tree[1]] |
| right_names = [node[0] for node in right_tree[1]] |
| l, r = 0, 0 |
| while l < len(left_names) and r < len(right_names): |
| # Names are sorted in getDirTree, rely on that order. |
| if left_names[l] < right_names[r]: |
| exitCode = 1 |
| printOnlyIn(left_base, left_tree[0], left_names[l]) |
| l += 1 |
| elif left_names[l] > right_names[r]: |
| exitCode = 1 |
| printOnlyIn(right_base, right_tree[0], right_names[r]) |
| r += 1 |
| else: |
| exitCode |= compareDirTrees(flags, |
| [left_tree[1][l], right_tree[1][r]], |
| [os.path.join(left_base, left_tree[0]), |
| os.path.join(right_base, right_tree[0])]) |
| l += 1 |
| r += 1 |
| |
| # At least one of the trees has ended. Report names from the other tree. |
| while l < len(left_names): |
| exitCode = 1 |
| printOnlyIn(left_base, left_tree[0], left_names[l]) |
| l += 1 |
| while r < len(right_names): |
| exitCode = 1 |
| printOnlyIn(right_base, right_tree[0], right_names[r]) |
| r += 1 |
| return exitCode |
| |
| def main(argv): |
| if sys.platform == "win32": |
| if hasattr(sys.stdout, 'buffer'): |
| # python 3 |
| sys.stdout = io.TextIOWrapper(sys.stdout.buffer, newline='\n') |
| else: |
| # python 2.7 |
| import msvcrt |
| msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) |
| args = argv[1:] |
| try: |
| opts, args = getopt.gnu_getopt(args, "wbuU:r", ["strip-trailing-cr"]) |
| except getopt.GetoptError as err: |
| sys.stderr.write("Unsupported: 'diff': %s\n" % str(err)) |
| sys.exit(1) |
| |
| flags = DiffFlags() |
| filelines, filepaths, dir_trees = ([] for i in range(3)) |
| for o, a in opts: |
| if o == "-w": |
| flags.ignore_all_space = True |
| elif o == "-b": |
| flags.ignore_space_change = True |
| elif o == "-u": |
| flags.unified_diff = True |
| elif o.startswith("-U"): |
| flags.unified_diff = True |
| try: |
| flags.num_context_lines = int(a) |
| if flags.num_context_lines < 0: |
| raise ValueException |
| except: |
| sys.stderr.write("Error: invalid '-U' argument: {}\n" |
| .format(a)) |
| sys.exit(1) |
| elif o == "-r": |
| flags.recursive_diff = True |
| elif o == "--strip-trailing-cr": |
| flags.strip_trailing_cr = True |
| else: |
| assert False, "unhandled option" |
| |
| if len(args) != 2: |
| sys.stderr.write("Error: missing or extra operand\n") |
| sys.exit(1) |
| |
| exitCode = 0 |
| try: |
| for file in args: |
| if file != "-" and not os.path.isabs(file): |
| file = os.path.realpath(os.path.join(os.getcwd(), file)) |
| |
| if flags.recursive_diff: |
| if file == "-": |
| sys.stderr.write("Error: cannot recursively compare '-'\n") |
| sys.exit(1) |
| dir_trees.append(getDirTree(file)) |
| else: |
| filepaths.append(file) |
| |
| if not flags.recursive_diff: |
| exitCode = compareTwoFiles(flags, filepaths) |
| else: |
| exitCode = compareDirTrees(flags, dir_trees) |
| |
| except IOError as err: |
| sys.stderr.write("Error: 'diff' command failed, %s\n" % str(err)) |
| exitCode = 1 |
| |
| sys.exit(exitCode) |
| |
| if __name__ == "__main__": |
| main(sys.argv) |