#!/bin/python
###############################################################################
#
# Diff two licenses.xml files. There are two cases:
# * Two arguments
#    In this case, the two arguments are the two files to compare
#
# * No arguments
#     In this casem the license files are specified by the following
#     parameters in pardoc-distro.xml:
#     - prev_baseline
#     - prev_lic_file
#     - new_lic_file
#
# The result is presented as three sets:
# 1) Removed
#   Packages present in the previous file, but not in the new.
#
# 2) Added
#   Packages present in the new file, but not in the previous.
#
# 3) Changed
#   Packages present in both files, but with different versions. If more than
#   one version of a package is included, then all difference in the version
#   set causes the package to be listed as changed.
#   E.g.
#    (v1) -> (v2)
#    (v1, v2) -> (v2, v3)
#
# Note that packages with the unchanged version is not listed.
#
# The output is presented as XML code printed to stdout. A summary is printed
# to STDERR at the end.
#
###############################################################################
import os
import sys
import subprocess as sp
import xml.etree.ElementTree as ET
import re
repo_root   = sp.check_output(["git", "rev-parse", "--show-toplevel"]).rstrip()
script_root = os.path.dirname(os.path.realpath(__file__))
param_file  = os.path.join(script_root, "docsrc_common/pardoc-distro.xml")
def get_param(param_name):
    pat = re.compile("%s.*>([^<>]+)" % param_name)
    with open(param_file) as f:
        for line in f:
            m = pat.search(line)
            if m:
                return m.group(1)
    return None
def get_pkgs(file_spec):
    if file_spec.find(":") >= 0:
        s = sp.check_output(("git", "show", file_spec))
    else:
        f = open(file_spec)
        s = f.read()
        f.close()
        del f
    # ET can't handle some special quotes
    for old, new in (("”", """), ("”", """)):
        s = s.replace(old, new)
    root = ET.fromstring(s)
    for node in root.iter("section"):
        if "id" in node.attrib:
            if node.attrib["id"] == "licenses_packages":
                break
    else:
        return None
    for node in node:
        if node.tag == "informaltable":
            break
    else:
        return None
    tab = node[0][-1]
    plist = dict()
    for row in tab:
        pname = row[0].text
        pver  = row[1].text
        if not pname in plist:
            plist[pname] = set()
        plist[pname].add(pver)
    return set(plist), plist
#----------------------------------------
if len(sys.argv) == 3:
    new_file, prev_file = sys.argv[1:3]
elif len(sys.argv) == 1:
    prev_baseline = get_param("prev_baseline")
    prev_lic_file = get_param("prev_lic_file")
    new_lic_file  = get_param("new_lic_file")
    if not (prev_baseline and prev_lic_file and new_lic_file):
        print ''
        print ''
        print ''
        print '  Changes in the Set of Provided Package'
        print '  '
        print '    N/A. No previous baseline defined.'
        print '  '
        print ''
        exit(0)
    new_file = os.path.relpath(os.path.join(repo_root, new_lic_file))
    prev_file = "%s:%s" % (prev_baseline, prev_lic_file)
else:
    sys.stderr.write("Usage:\n")
    sys.stderr.write("  1) %s\n" % sys.argv[0])
    sys.stderr.write("  2) %s " % sys.argv[0])
    sys.stderr.write(" \n")
    sys.stderr.write("\n")
    sys.stderr.write("In case 1, the files are specified using the following\n")
    sys.stderr.write("parameters in pardoc-distro.xml:\n")
    sys.stderr.write("  - prev_baseline\n")
    sys.stderr.write("  - prev_lic_file\n")
    sys.stderr.write("  - new_lic_file\n")
    exit()
sys.stderr.write("New license file  : %s\n" % new_file)
sys.stderr.write("Prev license file : %s\n" % prev_file)
old_pset, old_pdict = get_pkgs(prev_file)
new_pset, new_pdict = get_pkgs(new_file)
added   = new_pset - old_pset # Set subtraction
removed = old_pset - new_pset # Set subtraction
common  = old_pset & new_pset
changed = [ p for p in common if old_pdict[p] != new_pdict[p] ]
print ''
print ''
print ''
print '  Changes in the Set of Provided Package'
print '  '
print '    This section describes changes in the provided packages.'
print '  '
print '  '
print '    Added Packages'
print '    '
print '      '
print '        '
print '          '
print '            Package'
print '            Version(s)'
print '          
'
print '        '
print '        '
for p in sorted(list(added)):
    print '          '
    print '            %s' % p
    print '            %s' % ", ".join(sorted(new_pdict[p]))
    print '          
'
print '        '
print '      '
print '    '
print '  '
print '  '
print '    Removed Packages'
print '    '
print '      '
print '        '
print '          '
print '            Package'
print '            Version(s)'
print '          
'
print '        '
print '        '
for p in sorted(list(removed)):
    print '          '
    print '            %s' % p
    print '            %s' % ", ".join(sorted(old_pdict[p]))
    print '          
'
print '        '
print '      '
print '    '
print '  '
print '  '
print '    Changed Package Versions'
print '    '
print '      '
print '        '
print '          '
print '            Package'
print '            Previous Version(s)'
print '            New Version(s)'
print '          
'
print '        '
print '        '
for p in sorted(list(changed)):
    print '          '
    print '            %s' % p
    print '            %s' % ", ".join(sorted(old_pdict[p]))
    print '            %s' % ", ".join(sorted(new_pdict[p]))
    print '          
'
print '        '
print '      '
print '    '
print '  '
print ''
sys.stderr.write("Package Summary:\n")
sys.stderr.write("  Prev file : %3d\n" % len(old_pset))
sys.stderr.write("  New file  : %3d\n" % len(new_pset))
sys.stderr.write("  Added     : %3d\n" % len(added))
sys.stderr.write("  Removed   : %3d\n" % len(removed))
sys.stderr.write("  Changed   : %3d\n" % len(changed))
sys.stderr.write("  Unchanged : %3d\n" % (len(common) - len(changed)))
sys.stderr.write("Done\n")