diff options
-rwxr-xr-x | scripts/combo-layer | 391 |
1 files changed, 389 insertions, 2 deletions
diff --git a/scripts/combo-layer b/scripts/combo-layer index 9297d5973d..92525cac14 100755 --- a/scripts/combo-layer +++ b/scripts/combo-layer | |||
@@ -28,6 +28,9 @@ import subprocess | |||
28 | import tempfile | 28 | import tempfile |
29 | import ConfigParser | 29 | import ConfigParser |
30 | import re | 30 | import re |
31 | import copy | ||
32 | import pipes | ||
33 | import shutil | ||
31 | from collections import OrderedDict | 34 | from collections import OrderedDict |
32 | from string import Template | 35 | from string import Template |
33 | 36 | ||
@@ -653,8 +656,7 @@ def action_update(conf, args): | |||
653 | action_pull(conf, ['arg0'] + components) | 656 | action_pull(conf, ['arg0'] + components) |
654 | 657 | ||
655 | if history: | 658 | if history: |
656 | logger.error("update with history not implemented yet") | 659 | update_with_history(conf, components, revisions, repos) |
657 | sys.exit(1) | ||
658 | else: | 660 | else: |
659 | update_with_patches(conf, components, revisions, repos) | 661 | update_with_patches(conf, components, revisions, repos) |
660 | 662 | ||
@@ -888,6 +890,391 @@ def action_splitpatch(conf, args): | |||
888 | else: | 890 | else: |
889 | logger.info(patch_filename) | 891 | logger.info(patch_filename) |
890 | 892 | ||
893 | def update_with_history(conf, components, revisions, repos): | ||
894 | '''Update all components with full history. | ||
895 | |||
896 | Works by importing all commits reachable from a component's | ||
897 | current head revision. If those commits are rooted in an already | ||
898 | imported commit, their content gets mixed with the content of the | ||
899 | combined repo of that commit (new or modified files overwritten, | ||
900 | removed files removed). | ||
901 | |||
902 | The last commit is an artificial merge commit that merges all the | ||
903 | updated components into the combined repository. | ||
904 | |||
905 | The HEAD ref only gets updated at the very end. All intermediate work | ||
906 | happens in a worktree which will get garbage collected by git eventually | ||
907 | after a failure. | ||
908 | ''' | ||
909 | # Remember current HEAD and what we need to add to it. | ||
910 | head = runcmd("git rev-parse HEAD").strip() | ||
911 | additional_heads = {} | ||
912 | |||
913 | # Track the mapping between original commit and commit in the | ||
914 | # combined repo. We do not have to distinguish between components, | ||
915 | # because commit hashes are different anyway. Often we can | ||
916 | # skip find_revs() entirely (for example, when all new commits | ||
917 | # are derived from the last imported revision). | ||
918 | # | ||
919 | # Using "head" (typically the merge commit) instead of the actual | ||
920 | # commit for the component leads to a nicer history in the combined | ||
921 | # repo. | ||
922 | old2new_revs = {} | ||
923 | for name in repos: | ||
924 | repo = conf.repos[name] | ||
925 | revision = repo['last_revision'] | ||
926 | if revision: | ||
927 | old2new_revs[revision] = head | ||
928 | |||
929 | def add_p(parents): | ||
930 | '''Insert -p before each entry.''' | ||
931 | parameters = [] | ||
932 | for p in parents: | ||
933 | parameters.append('-p') | ||
934 | parameters.append(p) | ||
935 | return parameters | ||
936 | |||
937 | # Do all intermediate work with a separate work dir and index, | ||
938 | # chosen via env variables (can't use "git worktree", it is too | ||
939 | # new). This is useful (no changes to current work tree unless the | ||
940 | # update succeeds) and required (otherwise we end up temporarily | ||
941 | # removing the combo-layer hooks that we currently use when | ||
942 | # importing a new component). | ||
943 | # | ||
944 | # Not cleaned up after a failure at the moment. | ||
945 | wdir = os.path.join(os.getcwd(), ".git", "combo-layer") | ||
946 | windex = wdir + ".index" | ||
947 | if os.path.isdir(wdir): | ||
948 | shutil.rmtree(wdir) | ||
949 | os.mkdir(wdir) | ||
950 | wenv = copy.deepcopy(os.environ) | ||
951 | wenv["GIT_WORK_TREE"] = wdir | ||
952 | wenv["GIT_INDEX_FILE"] = windex | ||
953 | # This one turned out to be needed in practice. | ||
954 | wenv["GIT_OBJECT_DIRECTORY"] = os.path.join(os.getcwd(), ".git", "objects") | ||
955 | wargs = {"destdir": wdir, "env": wenv} | ||
956 | |||
957 | for name in repos: | ||
958 | revision = revisions.get(name, None) | ||
959 | repo = conf.repos[name] | ||
960 | ldir = repo['local_repo_dir'] | ||
961 | dest_dir = repo['dest_dir'] | ||
962 | branch = repo.get('branch', "master") | ||
963 | hook = repo.get('hook', None) | ||
964 | largs = {"destdir": ldir, "env": None} | ||
965 | file_include = repo.get('file_filter', '').split() | ||
966 | file_include.sort() # make sure that short entries like '.' come first. | ||
967 | file_exclude = repo.get('file_exclude', '').split() | ||
968 | |||
969 | def include_file(file): | ||
970 | if not file_include: | ||
971 | # No explicit filter set, include file. | ||
972 | return True | ||
973 | for filter in file_include: | ||
974 | if filter == '.': | ||
975 | # Another special case: include current directory and thus all files. | ||
976 | return True | ||
977 | if os.path.commonprefix((filter, file)) == filter: | ||
978 | # Included in directory or direct file match. | ||
979 | return True | ||
980 | # Check for wildcard match *with* allowing * to match /, i.e. | ||
981 | # src/*.c does match src/foobar/*.c. That's not how it is done elsewhere | ||
982 | # when passing the filtering to "git archive", but it is unclear what | ||
983 | # the intended semantic is (the comment on file_exclude that "append a * wildcard | ||
984 | # at the end" to match the full content of a directories implies that | ||
985 | # slashes are indeed not special), so here we simply do what's easy to | ||
986 | # implement in Python. | ||
987 | logger.debug('fnmatch(%s, %s)' % (file, filter)) | ||
988 | if fnmatch.fnmatchcase(file, filter): | ||
989 | return True | ||
990 | return False | ||
991 | |||
992 | def exclude_file(file): | ||
993 | for filter in file_exclude: | ||
994 | if fnmatch.fnmatchcase(file, filter): | ||
995 | return True | ||
996 | return False | ||
997 | |||
998 | def file_filter(files): | ||
999 | '''Clean up file list so that only included files remain.''' | ||
1000 | index = 0 | ||
1001 | while index < len(files): | ||
1002 | file = files[index] | ||
1003 | if not include_file(file) or exclude_file(file): | ||
1004 | del files[index] | ||
1005 | else: | ||
1006 | index += 1 | ||
1007 | |||
1008 | |||
1009 | # Generate the revision list. | ||
1010 | logger.info("Analyzing commits from %s..." % name) | ||
1011 | top_revision = revision or branch | ||
1012 | if not check_rev_branch(name, ldir, top_revision, branch): | ||
1013 | sys.exit(1) | ||
1014 | |||
1015 | last_revision = repo['last_revision'] | ||
1016 | rev_list_args = "--full-history --sparse --topo-order --reverse" | ||
1017 | if not last_revision: | ||
1018 | logger.info("Warning: last_revision of component %s is not set, starting from the first commit" % name) | ||
1019 | rev_list_args = rev_list_args + ' ' + top_revision | ||
1020 | else: | ||
1021 | if not check_rev_branch(name, ldir, last_revision, branch): | ||
1022 | sys.exit(1) | ||
1023 | rev_list_args = "%s %s..%s" % (rev_list_args, last_revision, top_revision) | ||
1024 | |||
1025 | # By definition, the current HEAD contains the latest imported | ||
1026 | # commit of each component. We use that as initial mapping even | ||
1027 | # though the commits do not match exactly because | ||
1028 | # a) it always works (in contrast to find_revs, which relies on special | ||
1029 | # commit messages) | ||
1030 | # b) it is faster than find_revs, which will only be called on demand | ||
1031 | # and can be skipped entirely in most cases | ||
1032 | # c) last but not least, the combined history looks nicer when all | ||
1033 | # new commits are rooted in the same merge commit | ||
1034 | old2new_revs[last_revision] = head | ||
1035 | |||
1036 | # We care about all commits (--full-history and --sparse) and | ||
1037 | # we want reconstruct the topology and thus do not care | ||
1038 | # about ordering by time (--topo-order). We ask for the ones | ||
1039 | # we need to import first to be listed first (--reverse). | ||
1040 | revs = runcmd("git rev-list %s" % rev_list_args, **largs).split() | ||
1041 | logger.debug("To be imported: %s" % revs) | ||
1042 | # Now 'revs' contains all revisions reachable from the top revision. | ||
1043 | # All revisions derived from the 'last_revision' definitely are new, | ||
1044 | # whereas the others may or may not have been imported before. For | ||
1045 | # a linear history in the component, that second set will be empty. | ||
1046 | # To distinguish between them, we also get the shorter list | ||
1047 | # of revisions starting at the ancestor. | ||
1048 | if last_revision: | ||
1049 | ancestor_revs = runcmd("git rev-list --ancestry-path %s" % rev_list_args, **largs).split() | ||
1050 | else: | ||
1051 | ancestor_revs = [] | ||
1052 | logger.debug("Ancestors: %s" % ancestor_revs) | ||
1053 | |||
1054 | # Now import each revision. | ||
1055 | logger.info("Importing commits from %s..." % name) | ||
1056 | def import_rev(rev): | ||
1057 | global scanned_revs | ||
1058 | |||
1059 | # If it is part of the new commits, we definitely need | ||
1060 | # to import it. Otherwise we need to check, we might have | ||
1061 | # imported it before. If it was imported and we merely | ||
1062 | # fail to find it because commit messages did not track | ||
1063 | # the mapping, then we end up importing it again. So | ||
1064 | # combined repos using "updating with history" really should | ||
1065 | # enable the "From ... rev:" commit header modifications. | ||
1066 | if rev not in ancestor_revs and rev not in old2new_revs and not scanned_revs: | ||
1067 | logger.debug("Revision %s triggers log analysis." % rev) | ||
1068 | find_revs(old2new_revs, head) | ||
1069 | scanned_revs = True | ||
1070 | new_rev = old2new_revs.get(rev, None) | ||
1071 | if new_rev: | ||
1072 | return new_rev | ||
1073 | |||
1074 | # If the commit is not in the original list of revisions | ||
1075 | # to be imported, then it must be a parent of one of those | ||
1076 | # commits and it was skipped during earlier imports or not | ||
1077 | # found. Importing such merge commits leads to very ugly | ||
1078 | # history (long cascade of merge commits which all point | ||
1079 | # to to older commits) when switching from "update via | ||
1080 | # patches" to "update with history". | ||
1081 | # | ||
1082 | # We can avoid importing merge commits if all non-merge commits | ||
1083 | # reachable from it were already imported. In that case we | ||
1084 | # can root the new commits in the current head revision. | ||
1085 | def is_imported(prev): | ||
1086 | parents = runcmd("git show --no-patch --pretty=format:%P " + prev, **largs).split() | ||
1087 | if len(parents) > 1: | ||
1088 | for p in parents: | ||
1089 | if not is_imported(p): | ||
1090 | logger.debug("Must import %s because %s is not imported." % (rev, p)) | ||
1091 | return False | ||
1092 | return True | ||
1093 | elif prev in old2new_revs: | ||
1094 | return True | ||
1095 | else: | ||
1096 | logger.debug("Must import %s because %s is not imported." % (rev, prev)) | ||
1097 | return False | ||
1098 | if rev not in revs and is_imported(rev): | ||
1099 | old2new_revs[rev] = head | ||
1100 | return head | ||
1101 | |||
1102 | # Need to import rev. Collect some information about it. | ||
1103 | logger.debug("Importing %s" % rev) | ||
1104 | (parents, author_name, author_email, author_timestamp, body) = \ | ||
1105 | runcmd("git show --no-patch --pretty=format:%P%x00%an%x00%ae%x00%at%x00%B " + rev, **largs).split(chr(0)) | ||
1106 | parents = parents.split() | ||
1107 | if parents: | ||
1108 | # Arbitrarily pick the first parent as base. It may or may not have | ||
1109 | # been imported before. For example, if the parent is a merge commit | ||
1110 | # and previously the combined repository used patching as update | ||
1111 | # method, then the actual merge commit parent never was imported. | ||
1112 | # To cover this, We recursively import parents. | ||
1113 | parent = parents[0] | ||
1114 | new_parent = import_rev(parent) | ||
1115 | # Clean index and working tree. TODO: can we combine this and the | ||
1116 | # next into one command with less file IO? | ||
1117 | # "git reset --hard" does not work, it changes HEAD of the parent | ||
1118 | # repo, which we wanted to avoid. Probably need to keep | ||
1119 | # track of the rev that corresponds to the index and use apply_commit(). | ||
1120 | runcmd("git rm -q --ignore-unmatch -rf .", **wargs) | ||
1121 | # Update index and working tree to match the parent. | ||
1122 | runcmd("git checkout -q -f %s ." % new_parent, **wargs) | ||
1123 | else: | ||
1124 | parent = None | ||
1125 | # Clean index and working tree. | ||
1126 | runcmd("git rm -q --ignore-unmatch -rf .", **wargs) | ||
1127 | |||
1128 | # Modify index and working tree such that it mirrors the commit. | ||
1129 | apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=file_filter) | ||
1130 | |||
1131 | # Now commit. | ||
1132 | new_tree = runcmd("git write-tree", **wargs).strip() | ||
1133 | env = copy.deepcopy(wenv) | ||
1134 | env['GIT_AUTHOR_NAME'] = author_name | ||
1135 | env['GIT_AUTHOR_EMAIL'] = author_email | ||
1136 | env['GIT_AUTHOR_DATE'] = author_timestamp | ||
1137 | if hook: | ||
1138 | # Need to turn the verbatim commit message into something resembling a patch header | ||
1139 | # for the hook. | ||
1140 | with tempfile.NamedTemporaryFile(delete=False) as patch: | ||
1141 | patch.write('Subject: [PATCH] ') | ||
1142 | patch.write(body) | ||
1143 | patch.write('\n---\n') | ||
1144 | patch.close() | ||
1145 | runcmd([hook, patch.name, rev, name]) | ||
1146 | with open(patch.name) as f: | ||
1147 | body = f.read()[len('Subject: [PATCH] '):][:-len('\n---\n')] | ||
1148 | |||
1149 | # We can skip non-merge commits that did not change any files. Those are typically | ||
1150 | # the result of file filtering, although they could also have been introduced | ||
1151 | # intentionally upstream, in which case we drop some information here. | ||
1152 | if len(parents) == 1: | ||
1153 | parent_rev = import_rev(parents[0]) | ||
1154 | old_tree = runcmd("git show -s --pretty=format:%T " + parent_rev, **wargs).strip() | ||
1155 | commit = old_tree != new_tree | ||
1156 | if not commit: | ||
1157 | new_rev = parent_rev | ||
1158 | else: | ||
1159 | commit = True | ||
1160 | if commit: | ||
1161 | new_rev = runcmd("git commit-tree".split() + add_p([import_rev(p) for p in parents]) + | ||
1162 | ["-m", body, new_tree], | ||
1163 | env=env).strip() | ||
1164 | old2new_revs[rev] = new_rev | ||
1165 | |||
1166 | return new_rev | ||
1167 | |||
1168 | if revs: | ||
1169 | for rev in revs: | ||
1170 | import_rev(rev) | ||
1171 | # Remember how to update our current head. New components get added, | ||
1172 | # updated components get the delta between current head and the updated component | ||
1173 | # applied. | ||
1174 | additional_heads[old2new_revs[revs[-1]]] = head if repo['last_revision'] else None | ||
1175 | repo['last_revision'] = revs[-1] | ||
1176 | |||
1177 | # Now construct the final merge commit. We create the tree by | ||
1178 | # starting with the head and applying the changes from each | ||
1179 | # components imported head revision. | ||
1180 | if additional_heads: | ||
1181 | runcmd("git reset --hard", **wargs) | ||
1182 | for rev, base in additional_heads.iteritems(): | ||
1183 | apply_commit(base, rev, wargs, wargs, None) | ||
1184 | |||
1185 | # Commit with all component branches as parents as well as the previous head. | ||
1186 | logger.info("Writing final merge commit...") | ||
1187 | msg = conf_commit_msg(conf, components) | ||
1188 | new_tree = runcmd("git write-tree", **wargs).strip() | ||
1189 | new_rev = runcmd("git commit-tree".split() + | ||
1190 | add_p([head] + additional_heads.keys()) + | ||
1191 | ["-m", msg, new_tree], | ||
1192 | **wargs).strip() | ||
1193 | # And done! This is the first time we change the HEAD in the actual work tree. | ||
1194 | runcmd("git reset --hard %s" % new_rev) | ||
1195 | |||
1196 | # Update and stage the (potentially modified) | ||
1197 | # combo-layer.conf, but do not commit separately. | ||
1198 | for name in repos: | ||
1199 | repo = conf.repos[name] | ||
1200 | rev = repo['last_revision'] | ||
1201 | conf.update(name, "last_revision", rev) | ||
1202 | if commit_conf_file(conf, components, False): | ||
1203 | # Must augment the previous commit. | ||
1204 | runcmd("git commit --amend -C HEAD") | ||
1205 | |||
1206 | |||
1207 | scanned_revs = False | ||
1208 | def find_revs(old2new, head): | ||
1209 | '''Construct mapping from original commit hash to commit hash in | ||
1210 | combined repo by looking at the commit messages. Depends on the | ||
1211 | "From ... rev: ..." convention.''' | ||
1212 | logger.info("Analyzing log messages to find previously imported commits...") | ||
1213 | num_known = len(old2new) | ||
1214 | log = runcmd("git log --grep='From .* rev: [a-fA-F0-9][a-fA-F0-9]*' --pretty=format:%H%x00%B%x00 " + head).split(chr(0)) | ||
1215 | regex = re.compile(r'From .* rev: ([a-fA-F0-9]+)') | ||
1216 | for new_rev, body in zip(*[iter(log)]* 2): | ||
1217 | # Use the last one, in the unlikely case there are more than one. | ||
1218 | rev = regex.findall(body)[-1] | ||
1219 | if rev not in old2new: | ||
1220 | old2new[rev] = new_rev.strip() | ||
1221 | logger.info("Found %d additional commits, leading to: %s" % (len(old2new) - num_known, old2new)) | ||
1222 | |||
1223 | |||
1224 | def apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=None): | ||
1225 | '''Compare revision against parent, remove files deleted in the | ||
1226 | commit, re-write new or modified ones. Moves them into dest_dir. | ||
1227 | Optionally filters files. | ||
1228 | ''' | ||
1229 | if not dest_dir: | ||
1230 | dest_dir = "." | ||
1231 | # -r recurses into sub-directories, given is the full overview of | ||
1232 | # what changed. We do not care about copy/edits or renames, so we | ||
1233 | # can disable those with --no-renames (but we still parse them, | ||
1234 | # because it was not clear from git documentation whether C and M | ||
1235 | # lines can still occur). | ||
1236 | logger.debug("Applying changes between %s and %s in %s" % (parent, rev, largs["destdir"])) | ||
1237 | delete = [] | ||
1238 | update = [] | ||
1239 | if parent: | ||
1240 | # Apply delta. | ||
1241 | changes = runcmd("git diff-tree --no-commit-id --no-renames --name-status -r --raw -z %s %s" % (parent, rev), **largs).split(chr(0)) | ||
1242 | for status, name in zip(*[iter(changes)]*2): | ||
1243 | if status[0] in "ACMRT": | ||
1244 | update.append(name) | ||
1245 | elif status[0] in "D": | ||
1246 | delete.append(name) | ||
1247 | else: | ||
1248 | logger.error("Unknown status %s of file %s in revision %s" % (status, name, rev)) | ||
1249 | sys.exit(1) | ||
1250 | else: | ||
1251 | # Copy all files. | ||
1252 | update.extend(runcmd("git ls-tree -r --name-only -z %s" % rev, **largs).split(chr(0))) | ||
1253 | |||
1254 | # Include/exclude files as define in the component config. | ||
1255 | # Both updated and deleted file lists get filtered, because it might happen | ||
1256 | # that a file gets excluded, pulled from a different component, and then the | ||
1257 | # excluded file gets deleted. In that case we must keep the copy. | ||
1258 | if file_filter: | ||
1259 | file_filter(update) | ||
1260 | file_filter(delete) | ||
1261 | |||
1262 | # We export into a tar archive here and extract with tar because it is simple (no | ||
1263 | # need to implement file and symlink writing ourselves) and gives us some degree | ||
1264 | # of parallel IO. The downside is that we have to pass the list of files via | ||
1265 | # command line parameters - hopefully there will never be too many at once. | ||
1266 | if update: | ||
1267 | target = os.path.join(wargs["destdir"], dest_dir) | ||
1268 | if not os.path.isdir(target): | ||
1269 | os.makedirs(target) | ||
1270 | runcmd("git archive %s %s | tar -C %s -xf -" % (rev, ' '.join([pipes.quote(x) for x in update]), pipes.quote(target)), **largs) | ||
1271 | runcmd("git add -f".split() + [os.path.join(dest_dir, x) for x in update], **wargs) | ||
1272 | if delete: | ||
1273 | for path in delete: | ||
1274 | if dest_dir: | ||
1275 | path = os.path.join(dest_dir, path) | ||
1276 | runcmd("git rm -f --ignore-unmatch".split() + [os.path.join(dest_dir, x) for x in delete], **wargs) | ||
1277 | |||
891 | def action_error(conf, args): | 1278 | def action_error(conf, args): |
892 | logger.info("invalid action %s" % args[0]) | 1279 | logger.info("invalid action %s" % args[0]) |
893 | 1280 | ||