diff options
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/combo-layer | 391 |
1 files changed, 389 insertions, 2 deletions
diff --git a/scripts/combo-layer b/scripts/combo-layer index 9297d5973d..92525cac14 100755 --- a/scripts/combo-layer +++ b/scripts/combo-layer | |||
| @@ -28,6 +28,9 @@ import subprocess | |||
| 28 | import tempfile | 28 | import tempfile |
| 29 | import ConfigParser | 29 | import ConfigParser |
| 30 | import re | 30 | import re |
| 31 | import copy | ||
| 32 | import pipes | ||
| 33 | import shutil | ||
| 31 | from collections import OrderedDict | 34 | from collections import OrderedDict |
| 32 | from string import Template | 35 | from string import Template |
| 33 | 36 | ||
| @@ -653,8 +656,7 @@ def action_update(conf, args): | |||
| 653 | action_pull(conf, ['arg0'] + components) | 656 | action_pull(conf, ['arg0'] + components) |
| 654 | 657 | ||
| 655 | if history: | 658 | if history: |
| 656 | logger.error("update with history not implemented yet") | 659 | update_with_history(conf, components, revisions, repos) |
| 657 | sys.exit(1) | ||
| 658 | else: | 660 | else: |
| 659 | update_with_patches(conf, components, revisions, repos) | 661 | update_with_patches(conf, components, revisions, repos) |
| 660 | 662 | ||
| @@ -888,6 +890,391 @@ def action_splitpatch(conf, args): | |||
| 888 | else: | 890 | else: |
| 889 | logger.info(patch_filename) | 891 | logger.info(patch_filename) |
| 890 | 892 | ||
| 893 | def update_with_history(conf, components, revisions, repos): | ||
| 894 | '''Update all components with full history. | ||
| 895 | |||
| 896 | Works by importing all commits reachable from a component's | ||
| 897 | current head revision. If those commits are rooted in an already | ||
| 898 | imported commit, their content gets mixed with the content of the | ||
| 899 | combined repo of that commit (new or modified files overwritten, | ||
| 900 | removed files removed). | ||
| 901 | |||
| 902 | The last commit is an artificial merge commit that merges all the | ||
| 903 | updated components into the combined repository. | ||
| 904 | |||
| 905 | The HEAD ref only gets updated at the very end. All intermediate work | ||
| 906 | happens in a worktree which will get garbage collected by git eventually | ||
| 907 | after a failure. | ||
| 908 | ''' | ||
| 909 | # Remember current HEAD and what we need to add to it. | ||
| 910 | head = runcmd("git rev-parse HEAD").strip() | ||
| 911 | additional_heads = {} | ||
| 912 | |||
| 913 | # Track the mapping between original commit and commit in the | ||
| 914 | # combined repo. We do not have to distinguish between components, | ||
| 915 | # because commit hashes are different anyway. Often we can | ||
| 916 | # skip find_revs() entirely (for example, when all new commits | ||
| 917 | # are derived from the last imported revision). | ||
| 918 | # | ||
| 919 | # Using "head" (typically the merge commit) instead of the actual | ||
| 920 | # commit for the component leads to a nicer history in the combined | ||
| 921 | # repo. | ||
| 922 | old2new_revs = {} | ||
| 923 | for name in repos: | ||
| 924 | repo = conf.repos[name] | ||
| 925 | revision = repo['last_revision'] | ||
| 926 | if revision: | ||
| 927 | old2new_revs[revision] = head | ||
| 928 | |||
| 929 | def add_p(parents): | ||
| 930 | '''Insert -p before each entry.''' | ||
| 931 | parameters = [] | ||
| 932 | for p in parents: | ||
| 933 | parameters.append('-p') | ||
| 934 | parameters.append(p) | ||
| 935 | return parameters | ||
| 936 | |||
| 937 | # Do all intermediate work with a separate work dir and index, | ||
| 938 | # chosen via env variables (can't use "git worktree", it is too | ||
| 939 | # new). This is useful (no changes to current work tree unless the | ||
| 940 | # update succeeds) and required (otherwise we end up temporarily | ||
| 941 | # removing the combo-layer hooks that we currently use when | ||
| 942 | # importing a new component). | ||
| 943 | # | ||
| 944 | # Not cleaned up after a failure at the moment. | ||
| 945 | wdir = os.path.join(os.getcwd(), ".git", "combo-layer") | ||
| 946 | windex = wdir + ".index" | ||
| 947 | if os.path.isdir(wdir): | ||
| 948 | shutil.rmtree(wdir) | ||
| 949 | os.mkdir(wdir) | ||
| 950 | wenv = copy.deepcopy(os.environ) | ||
| 951 | wenv["GIT_WORK_TREE"] = wdir | ||
| 952 | wenv["GIT_INDEX_FILE"] = windex | ||
| 953 | # This one turned out to be needed in practice. | ||
| 954 | wenv["GIT_OBJECT_DIRECTORY"] = os.path.join(os.getcwd(), ".git", "objects") | ||
| 955 | wargs = {"destdir": wdir, "env": wenv} | ||
| 956 | |||
| 957 | for name in repos: | ||
| 958 | revision = revisions.get(name, None) | ||
| 959 | repo = conf.repos[name] | ||
| 960 | ldir = repo['local_repo_dir'] | ||
| 961 | dest_dir = repo['dest_dir'] | ||
| 962 | branch = repo.get('branch', "master") | ||
| 963 | hook = repo.get('hook', None) | ||
| 964 | largs = {"destdir": ldir, "env": None} | ||
| 965 | file_include = repo.get('file_filter', '').split() | ||
| 966 | file_include.sort() # make sure that short entries like '.' come first. | ||
| 967 | file_exclude = repo.get('file_exclude', '').split() | ||
| 968 | |||
| 969 | def include_file(file): | ||
| 970 | if not file_include: | ||
| 971 | # No explicit filter set, include file. | ||
| 972 | return True | ||
| 973 | for filter in file_include: | ||
| 974 | if filter == '.': | ||
| 975 | # Another special case: include current directory and thus all files. | ||
| 976 | return True | ||
| 977 | if os.path.commonprefix((filter, file)) == filter: | ||
| 978 | # Included in directory or direct file match. | ||
| 979 | return True | ||
| 980 | # Check for wildcard match *with* allowing * to match /, i.e. | ||
| 981 | # src/*.c does match src/foobar/*.c. That's not how it is done elsewhere | ||
| 982 | # when passing the filtering to "git archive", but it is unclear what | ||
| 983 | # the intended semantic is (the comment on file_exclude that "append a * wildcard | ||
| 984 | # at the end" to match the full content of a directories implies that | ||
| 985 | # slashes are indeed not special), so here we simply do what's easy to | ||
| 986 | # implement in Python. | ||
| 987 | logger.debug('fnmatch(%s, %s)' % (file, filter)) | ||
| 988 | if fnmatch.fnmatchcase(file, filter): | ||
| 989 | return True | ||
| 990 | return False | ||
| 991 | |||
| 992 | def exclude_file(file): | ||
| 993 | for filter in file_exclude: | ||
| 994 | if fnmatch.fnmatchcase(file, filter): | ||
| 995 | return True | ||
| 996 | return False | ||
| 997 | |||
| 998 | def file_filter(files): | ||
| 999 | '''Clean up file list so that only included files remain.''' | ||
| 1000 | index = 0 | ||
| 1001 | while index < len(files): | ||
| 1002 | file = files[index] | ||
| 1003 | if not include_file(file) or exclude_file(file): | ||
| 1004 | del files[index] | ||
| 1005 | else: | ||
| 1006 | index += 1 | ||
| 1007 | |||
| 1008 | |||
| 1009 | # Generate the revision list. | ||
| 1010 | logger.info("Analyzing commits from %s..." % name) | ||
| 1011 | top_revision = revision or branch | ||
| 1012 | if not check_rev_branch(name, ldir, top_revision, branch): | ||
| 1013 | sys.exit(1) | ||
| 1014 | |||
| 1015 | last_revision = repo['last_revision'] | ||
| 1016 | rev_list_args = "--full-history --sparse --topo-order --reverse" | ||
| 1017 | if not last_revision: | ||
| 1018 | logger.info("Warning: last_revision of component %s is not set, starting from the first commit" % name) | ||
| 1019 | rev_list_args = rev_list_args + ' ' + top_revision | ||
| 1020 | else: | ||
| 1021 | if not check_rev_branch(name, ldir, last_revision, branch): | ||
| 1022 | sys.exit(1) | ||
| 1023 | rev_list_args = "%s %s..%s" % (rev_list_args, last_revision, top_revision) | ||
| 1024 | |||
| 1025 | # By definition, the current HEAD contains the latest imported | ||
| 1026 | # commit of each component. We use that as initial mapping even | ||
| 1027 | # though the commits do not match exactly because | ||
| 1028 | # a) it always works (in contrast to find_revs, which relies on special | ||
| 1029 | # commit messages) | ||
| 1030 | # b) it is faster than find_revs, which will only be called on demand | ||
| 1031 | # and can be skipped entirely in most cases | ||
| 1032 | # c) last but not least, the combined history looks nicer when all | ||
| 1033 | # new commits are rooted in the same merge commit | ||
| 1034 | old2new_revs[last_revision] = head | ||
| 1035 | |||
| 1036 | # We care about all commits (--full-history and --sparse) and | ||
| 1037 | # we want reconstruct the topology and thus do not care | ||
| 1038 | # about ordering by time (--topo-order). We ask for the ones | ||
| 1039 | # we need to import first to be listed first (--reverse). | ||
| 1040 | revs = runcmd("git rev-list %s" % rev_list_args, **largs).split() | ||
| 1041 | logger.debug("To be imported: %s" % revs) | ||
| 1042 | # Now 'revs' contains all revisions reachable from the top revision. | ||
| 1043 | # All revisions derived from the 'last_revision' definitely are new, | ||
| 1044 | # whereas the others may or may not have been imported before. For | ||
| 1045 | # a linear history in the component, that second set will be empty. | ||
| 1046 | # To distinguish between them, we also get the shorter list | ||
| 1047 | # of revisions starting at the ancestor. | ||
| 1048 | if last_revision: | ||
| 1049 | ancestor_revs = runcmd("git rev-list --ancestry-path %s" % rev_list_args, **largs).split() | ||
| 1050 | else: | ||
| 1051 | ancestor_revs = [] | ||
| 1052 | logger.debug("Ancestors: %s" % ancestor_revs) | ||
| 1053 | |||
| 1054 | # Now import each revision. | ||
| 1055 | logger.info("Importing commits from %s..." % name) | ||
| 1056 | def import_rev(rev): | ||
| 1057 | global scanned_revs | ||
| 1058 | |||
| 1059 | # If it is part of the new commits, we definitely need | ||
| 1060 | # to import it. Otherwise we need to check, we might have | ||
| 1061 | # imported it before. If it was imported and we merely | ||
| 1062 | # fail to find it because commit messages did not track | ||
| 1063 | # the mapping, then we end up importing it again. So | ||
| 1064 | # combined repos using "updating with history" really should | ||
| 1065 | # enable the "From ... rev:" commit header modifications. | ||
| 1066 | if rev not in ancestor_revs and rev not in old2new_revs and not scanned_revs: | ||
| 1067 | logger.debug("Revision %s triggers log analysis." % rev) | ||
| 1068 | find_revs(old2new_revs, head) | ||
| 1069 | scanned_revs = True | ||
| 1070 | new_rev = old2new_revs.get(rev, None) | ||
| 1071 | if new_rev: | ||
| 1072 | return new_rev | ||
| 1073 | |||
| 1074 | # If the commit is not in the original list of revisions | ||
| 1075 | # to be imported, then it must be a parent of one of those | ||
| 1076 | # commits and it was skipped during earlier imports or not | ||
| 1077 | # found. Importing such merge commits leads to very ugly | ||
| 1078 | # history (long cascade of merge commits which all point | ||
| 1079 | # to to older commits) when switching from "update via | ||
| 1080 | # patches" to "update with history". | ||
| 1081 | # | ||
| 1082 | # We can avoid importing merge commits if all non-merge commits | ||
| 1083 | # reachable from it were already imported. In that case we | ||
| 1084 | # can root the new commits in the current head revision. | ||
| 1085 | def is_imported(prev): | ||
| 1086 | parents = runcmd("git show --no-patch --pretty=format:%P " + prev, **largs).split() | ||
| 1087 | if len(parents) > 1: | ||
| 1088 | for p in parents: | ||
| 1089 | if not is_imported(p): | ||
| 1090 | logger.debug("Must import %s because %s is not imported." % (rev, p)) | ||
| 1091 | return False | ||
| 1092 | return True | ||
| 1093 | elif prev in old2new_revs: | ||
| 1094 | return True | ||
| 1095 | else: | ||
| 1096 | logger.debug("Must import %s because %s is not imported." % (rev, prev)) | ||
| 1097 | return False | ||
| 1098 | if rev not in revs and is_imported(rev): | ||
| 1099 | old2new_revs[rev] = head | ||
| 1100 | return head | ||
| 1101 | |||
| 1102 | # Need to import rev. Collect some information about it. | ||
| 1103 | logger.debug("Importing %s" % rev) | ||
| 1104 | (parents, author_name, author_email, author_timestamp, body) = \ | ||
| 1105 | runcmd("git show --no-patch --pretty=format:%P%x00%an%x00%ae%x00%at%x00%B " + rev, **largs).split(chr(0)) | ||
| 1106 | parents = parents.split() | ||
| 1107 | if parents: | ||
| 1108 | # Arbitrarily pick the first parent as base. It may or may not have | ||
| 1109 | # been imported before. For example, if the parent is a merge commit | ||
| 1110 | # and previously the combined repository used patching as update | ||
| 1111 | # method, then the actual merge commit parent never was imported. | ||
| 1112 | # To cover this, We recursively import parents. | ||
| 1113 | parent = parents[0] | ||
| 1114 | new_parent = import_rev(parent) | ||
| 1115 | # Clean index and working tree. TODO: can we combine this and the | ||
| 1116 | # next into one command with less file IO? | ||
| 1117 | # "git reset --hard" does not work, it changes HEAD of the parent | ||
| 1118 | # repo, which we wanted to avoid. Probably need to keep | ||
| 1119 | # track of the rev that corresponds to the index and use apply_commit(). | ||
| 1120 | runcmd("git rm -q --ignore-unmatch -rf .", **wargs) | ||
| 1121 | # Update index and working tree to match the parent. | ||
| 1122 | runcmd("git checkout -q -f %s ." % new_parent, **wargs) | ||
| 1123 | else: | ||
| 1124 | parent = None | ||
| 1125 | # Clean index and working tree. | ||
| 1126 | runcmd("git rm -q --ignore-unmatch -rf .", **wargs) | ||
| 1127 | |||
| 1128 | # Modify index and working tree such that it mirrors the commit. | ||
| 1129 | apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=file_filter) | ||
| 1130 | |||
| 1131 | # Now commit. | ||
| 1132 | new_tree = runcmd("git write-tree", **wargs).strip() | ||
| 1133 | env = copy.deepcopy(wenv) | ||
| 1134 | env['GIT_AUTHOR_NAME'] = author_name | ||
| 1135 | env['GIT_AUTHOR_EMAIL'] = author_email | ||
| 1136 | env['GIT_AUTHOR_DATE'] = author_timestamp | ||
| 1137 | if hook: | ||
| 1138 | # Need to turn the verbatim commit message into something resembling a patch header | ||
| 1139 | # for the hook. | ||
| 1140 | with tempfile.NamedTemporaryFile(delete=False) as patch: | ||
| 1141 | patch.write('Subject: [PATCH] ') | ||
| 1142 | patch.write(body) | ||
| 1143 | patch.write('\n---\n') | ||
| 1144 | patch.close() | ||
| 1145 | runcmd([hook, patch.name, rev, name]) | ||
| 1146 | with open(patch.name) as f: | ||
| 1147 | body = f.read()[len('Subject: [PATCH] '):][:-len('\n---\n')] | ||
| 1148 | |||
| 1149 | # We can skip non-merge commits that did not change any files. Those are typically | ||
| 1150 | # the result of file filtering, although they could also have been introduced | ||
| 1151 | # intentionally upstream, in which case we drop some information here. | ||
| 1152 | if len(parents) == 1: | ||
| 1153 | parent_rev = import_rev(parents[0]) | ||
| 1154 | old_tree = runcmd("git show -s --pretty=format:%T " + parent_rev, **wargs).strip() | ||
| 1155 | commit = old_tree != new_tree | ||
| 1156 | if not commit: | ||
| 1157 | new_rev = parent_rev | ||
| 1158 | else: | ||
| 1159 | commit = True | ||
| 1160 | if commit: | ||
| 1161 | new_rev = runcmd("git commit-tree".split() + add_p([import_rev(p) for p in parents]) + | ||
| 1162 | ["-m", body, new_tree], | ||
| 1163 | env=env).strip() | ||
| 1164 | old2new_revs[rev] = new_rev | ||
| 1165 | |||
| 1166 | return new_rev | ||
| 1167 | |||
| 1168 | if revs: | ||
| 1169 | for rev in revs: | ||
| 1170 | import_rev(rev) | ||
| 1171 | # Remember how to update our current head. New components get added, | ||
| 1172 | # updated components get the delta between current head and the updated component | ||
| 1173 | # applied. | ||
| 1174 | additional_heads[old2new_revs[revs[-1]]] = head if repo['last_revision'] else None | ||
| 1175 | repo['last_revision'] = revs[-1] | ||
| 1176 | |||
| 1177 | # Now construct the final merge commit. We create the tree by | ||
| 1178 | # starting with the head and applying the changes from each | ||
| 1179 | # components imported head revision. | ||
| 1180 | if additional_heads: | ||
| 1181 | runcmd("git reset --hard", **wargs) | ||
| 1182 | for rev, base in additional_heads.iteritems(): | ||
| 1183 | apply_commit(base, rev, wargs, wargs, None) | ||
| 1184 | |||
| 1185 | # Commit with all component branches as parents as well as the previous head. | ||
| 1186 | logger.info("Writing final merge commit...") | ||
| 1187 | msg = conf_commit_msg(conf, components) | ||
| 1188 | new_tree = runcmd("git write-tree", **wargs).strip() | ||
| 1189 | new_rev = runcmd("git commit-tree".split() + | ||
| 1190 | add_p([head] + additional_heads.keys()) + | ||
| 1191 | ["-m", msg, new_tree], | ||
| 1192 | **wargs).strip() | ||
| 1193 | # And done! This is the first time we change the HEAD in the actual work tree. | ||
| 1194 | runcmd("git reset --hard %s" % new_rev) | ||
| 1195 | |||
| 1196 | # Update and stage the (potentially modified) | ||
| 1197 | # combo-layer.conf, but do not commit separately. | ||
| 1198 | for name in repos: | ||
| 1199 | repo = conf.repos[name] | ||
| 1200 | rev = repo['last_revision'] | ||
| 1201 | conf.update(name, "last_revision", rev) | ||
| 1202 | if commit_conf_file(conf, components, False): | ||
| 1203 | # Must augment the previous commit. | ||
| 1204 | runcmd("git commit --amend -C HEAD") | ||
| 1205 | |||
| 1206 | |||
| 1207 | scanned_revs = False | ||
| 1208 | def find_revs(old2new, head): | ||
| 1209 | '''Construct mapping from original commit hash to commit hash in | ||
| 1210 | combined repo by looking at the commit messages. Depends on the | ||
| 1211 | "From ... rev: ..." convention.''' | ||
| 1212 | logger.info("Analyzing log messages to find previously imported commits...") | ||
| 1213 | num_known = len(old2new) | ||
| 1214 | log = runcmd("git log --grep='From .* rev: [a-fA-F0-9][a-fA-F0-9]*' --pretty=format:%H%x00%B%x00 " + head).split(chr(0)) | ||
| 1215 | regex = re.compile(r'From .* rev: ([a-fA-F0-9]+)') | ||
| 1216 | for new_rev, body in zip(*[iter(log)]* 2): | ||
| 1217 | # Use the last one, in the unlikely case there are more than one. | ||
| 1218 | rev = regex.findall(body)[-1] | ||
| 1219 | if rev not in old2new: | ||
| 1220 | old2new[rev] = new_rev.strip() | ||
| 1221 | logger.info("Found %d additional commits, leading to: %s" % (len(old2new) - num_known, old2new)) | ||
| 1222 | |||
| 1223 | |||
| 1224 | def apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=None): | ||
| 1225 | '''Compare revision against parent, remove files deleted in the | ||
| 1226 | commit, re-write new or modified ones. Moves them into dest_dir. | ||
| 1227 | Optionally filters files. | ||
| 1228 | ''' | ||
| 1229 | if not dest_dir: | ||
| 1230 | dest_dir = "." | ||
| 1231 | # -r recurses into sub-directories, given is the full overview of | ||
| 1232 | # what changed. We do not care about copy/edits or renames, so we | ||
| 1233 | # can disable those with --no-renames (but we still parse them, | ||
| 1234 | # because it was not clear from git documentation whether C and M | ||
| 1235 | # lines can still occur). | ||
| 1236 | logger.debug("Applying changes between %s and %s in %s" % (parent, rev, largs["destdir"])) | ||
| 1237 | delete = [] | ||
| 1238 | update = [] | ||
| 1239 | if parent: | ||
| 1240 | # Apply delta. | ||
| 1241 | changes = runcmd("git diff-tree --no-commit-id --no-renames --name-status -r --raw -z %s %s" % (parent, rev), **largs).split(chr(0)) | ||
| 1242 | for status, name in zip(*[iter(changes)]*2): | ||
| 1243 | if status[0] in "ACMRT": | ||
| 1244 | update.append(name) | ||
| 1245 | elif status[0] in "D": | ||
| 1246 | delete.append(name) | ||
| 1247 | else: | ||
| 1248 | logger.error("Unknown status %s of file %s in revision %s" % (status, name, rev)) | ||
| 1249 | sys.exit(1) | ||
| 1250 | else: | ||
| 1251 | # Copy all files. | ||
| 1252 | update.extend(runcmd("git ls-tree -r --name-only -z %s" % rev, **largs).split(chr(0))) | ||
| 1253 | |||
| 1254 | # Include/exclude files as define in the component config. | ||
| 1255 | # Both updated and deleted file lists get filtered, because it might happen | ||
| 1256 | # that a file gets excluded, pulled from a different component, and then the | ||
| 1257 | # excluded file gets deleted. In that case we must keep the copy. | ||
| 1258 | if file_filter: | ||
| 1259 | file_filter(update) | ||
| 1260 | file_filter(delete) | ||
| 1261 | |||
| 1262 | # We export into a tar archive here and extract with tar because it is simple (no | ||
| 1263 | # need to implement file and symlink writing ourselves) and gives us some degree | ||
| 1264 | # of parallel IO. The downside is that we have to pass the list of files via | ||
| 1265 | # command line parameters - hopefully there will never be too many at once. | ||
| 1266 | if update: | ||
| 1267 | target = os.path.join(wargs["destdir"], dest_dir) | ||
| 1268 | if not os.path.isdir(target): | ||
| 1269 | os.makedirs(target) | ||
| 1270 | runcmd("git archive %s %s | tar -C %s -xf -" % (rev, ' '.join([pipes.quote(x) for x in update]), pipes.quote(target)), **largs) | ||
| 1271 | runcmd("git add -f".split() + [os.path.join(dest_dir, x) for x in update], **wargs) | ||
| 1272 | if delete: | ||
| 1273 | for path in delete: | ||
| 1274 | if dest_dir: | ||
| 1275 | path = os.path.join(dest_dir, path) | ||
| 1276 | runcmd("git rm -f --ignore-unmatch".split() + [os.path.join(dest_dir, x) for x in delete], **wargs) | ||
| 1277 | |||
| 891 | def action_error(conf, args): | 1278 | def action_error(conf, args): |
| 892 | logger.info("invalid action %s" % args[0]) | 1279 | logger.info("invalid action %s" % args[0]) |
| 893 | 1280 | ||
