summaryrefslogtreecommitdiffstats
path: root/scripts/combo-layer
diff options
context:
space:
mode:
authorPatrick Ohly <patrick.ohly@intel.com>2016-05-02 15:27:28 +0200
committerRichard Purdie <richard.purdie@linuxfoundation.org>2016-05-11 10:33:41 +0100
commitd53ed05390786b6a2f378195c0588b15cc336389 (patch)
tree469c3a183eb0528ac75bf5b4f8b192bf261bcbea /scripts/combo-layer
parent107c35e87ed08c99abad009bb1b9f90e587be56b (diff)
downloadpoky-d53ed05390786b6a2f378195c0588b15cc336389.tar.gz
combo-layer: implement "update with history"
The core idea is that all commits get imported, including merge commits, and joined into one big merge commit that imports the changes from the individual components into the main branch of the combined repository. This is done by copying the files in each commit and removing deleted ones, instead of trying to patch the combined repository. The advantages of doing updates in this mode are: - works for arbitrary upstream repos, not just those which support conversion into a linear set of patches - listing history shows that commits where developed independently in the different components, instead of artificially showing them as if they had been developed one after the after (component "aaaa" before "bbbb", then "ccc", ...) - bisecting becomes easier: when upstream repos only ensure consistency when merging into their "master" branches, then those merge commits are good candidates for test builds also in the combined repo - more data mining can be done, for example showing who merged a commit and when Selecting a subset of the files is supported, albeit with a slight different semantic for wild card matching compared to other code paths (/ is matched by * and ?). Empty commits get skipped because typically they are a result of filtering (but that is not checked, so intentionally empty commits also get skipped). Other code paths are intentionally left unchanged, to avoid regressions. However, the downside is that some opportunities for refactoring (in particular regarding file filtering) were ignored. (From OE-Core rev: 660f76b6fb0fb95738a2c8f50e0a99ffa5831c64) Signed-off-by: Patrick Ohly <patrick.ohly@intel.com> Signed-off-by: Ross Burton <ross.burton@intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'scripts/combo-layer')
-rwxr-xr-xscripts/combo-layer391
1 files changed, 389 insertions, 2 deletions
diff --git a/scripts/combo-layer b/scripts/combo-layer
index 9297d5973d..92525cac14 100755
--- a/scripts/combo-layer
+++ b/scripts/combo-layer
@@ -28,6 +28,9 @@ import subprocess
28import tempfile 28import tempfile
29import ConfigParser 29import ConfigParser
30import re 30import re
31import copy
32import pipes
33import shutil
31from collections import OrderedDict 34from collections import OrderedDict
32from string import Template 35from string import Template
33 36
@@ -653,8 +656,7 @@ def action_update(conf, args):
653 action_pull(conf, ['arg0'] + components) 656 action_pull(conf, ['arg0'] + components)
654 657
655 if history: 658 if history:
656 logger.error("update with history not implemented yet") 659 update_with_history(conf, components, revisions, repos)
657 sys.exit(1)
658 else: 660 else:
659 update_with_patches(conf, components, revisions, repos) 661 update_with_patches(conf, components, revisions, repos)
660 662
@@ -888,6 +890,391 @@ def action_splitpatch(conf, args):
888 else: 890 else:
889 logger.info(patch_filename) 891 logger.info(patch_filename)
890 892
893def update_with_history(conf, components, revisions, repos):
894 '''Update all components with full history.
895
896 Works by importing all commits reachable from a component's
897 current head revision. If those commits are rooted in an already
898 imported commit, their content gets mixed with the content of the
899 combined repo of that commit (new or modified files overwritten,
900 removed files removed).
901
902 The last commit is an artificial merge commit that merges all the
903 updated components into the combined repository.
904
905 The HEAD ref only gets updated at the very end. All intermediate work
906 happens in a worktree which will get garbage collected by git eventually
907 after a failure.
908 '''
909 # Remember current HEAD and what we need to add to it.
910 head = runcmd("git rev-parse HEAD").strip()
911 additional_heads = {}
912
913 # Track the mapping between original commit and commit in the
914 # combined repo. We do not have to distinguish between components,
915 # because commit hashes are different anyway. Often we can
916 # skip find_revs() entirely (for example, when all new commits
917 # are derived from the last imported revision).
918 #
919 # Using "head" (typically the merge commit) instead of the actual
920 # commit for the component leads to a nicer history in the combined
921 # repo.
922 old2new_revs = {}
923 for name in repos:
924 repo = conf.repos[name]
925 revision = repo['last_revision']
926 if revision:
927 old2new_revs[revision] = head
928
929 def add_p(parents):
930 '''Insert -p before each entry.'''
931 parameters = []
932 for p in parents:
933 parameters.append('-p')
934 parameters.append(p)
935 return parameters
936
937 # Do all intermediate work with a separate work dir and index,
938 # chosen via env variables (can't use "git worktree", it is too
939 # new). This is useful (no changes to current work tree unless the
940 # update succeeds) and required (otherwise we end up temporarily
941 # removing the combo-layer hooks that we currently use when
942 # importing a new component).
943 #
944 # Not cleaned up after a failure at the moment.
945 wdir = os.path.join(os.getcwd(), ".git", "combo-layer")
946 windex = wdir + ".index"
947 if os.path.isdir(wdir):
948 shutil.rmtree(wdir)
949 os.mkdir(wdir)
950 wenv = copy.deepcopy(os.environ)
951 wenv["GIT_WORK_TREE"] = wdir
952 wenv["GIT_INDEX_FILE"] = windex
953 # This one turned out to be needed in practice.
954 wenv["GIT_OBJECT_DIRECTORY"] = os.path.join(os.getcwd(), ".git", "objects")
955 wargs = {"destdir": wdir, "env": wenv}
956
957 for name in repos:
958 revision = revisions.get(name, None)
959 repo = conf.repos[name]
960 ldir = repo['local_repo_dir']
961 dest_dir = repo['dest_dir']
962 branch = repo.get('branch', "master")
963 hook = repo.get('hook', None)
964 largs = {"destdir": ldir, "env": None}
965 file_include = repo.get('file_filter', '').split()
966 file_include.sort() # make sure that short entries like '.' come first.
967 file_exclude = repo.get('file_exclude', '').split()
968
969 def include_file(file):
970 if not file_include:
971 # No explicit filter set, include file.
972 return True
973 for filter in file_include:
974 if filter == '.':
975 # Another special case: include current directory and thus all files.
976 return True
977 if os.path.commonprefix((filter, file)) == filter:
978 # Included in directory or direct file match.
979 return True
980 # Check for wildcard match *with* allowing * to match /, i.e.
981 # src/*.c does match src/foobar/*.c. That's not how it is done elsewhere
982 # when passing the filtering to "git archive", but it is unclear what
983 # the intended semantic is (the comment on file_exclude that "append a * wildcard
984 # at the end" to match the full content of a directories implies that
985 # slashes are indeed not special), so here we simply do what's easy to
986 # implement in Python.
987 logger.debug('fnmatch(%s, %s)' % (file, filter))
988 if fnmatch.fnmatchcase(file, filter):
989 return True
990 return False
991
992 def exclude_file(file):
993 for filter in file_exclude:
994 if fnmatch.fnmatchcase(file, filter):
995 return True
996 return False
997
998 def file_filter(files):
999 '''Clean up file list so that only included files remain.'''
1000 index = 0
1001 while index < len(files):
1002 file = files[index]
1003 if not include_file(file) or exclude_file(file):
1004 del files[index]
1005 else:
1006 index += 1
1007
1008
1009 # Generate the revision list.
1010 logger.info("Analyzing commits from %s..." % name)
1011 top_revision = revision or branch
1012 if not check_rev_branch(name, ldir, top_revision, branch):
1013 sys.exit(1)
1014
1015 last_revision = repo['last_revision']
1016 rev_list_args = "--full-history --sparse --topo-order --reverse"
1017 if not last_revision:
1018 logger.info("Warning: last_revision of component %s is not set, starting from the first commit" % name)
1019 rev_list_args = rev_list_args + ' ' + top_revision
1020 else:
1021 if not check_rev_branch(name, ldir, last_revision, branch):
1022 sys.exit(1)
1023 rev_list_args = "%s %s..%s" % (rev_list_args, last_revision, top_revision)
1024
1025 # By definition, the current HEAD contains the latest imported
1026 # commit of each component. We use that as initial mapping even
1027 # though the commits do not match exactly because
1028 # a) it always works (in contrast to find_revs, which relies on special
1029 # commit messages)
1030 # b) it is faster than find_revs, which will only be called on demand
1031 # and can be skipped entirely in most cases
1032 # c) last but not least, the combined history looks nicer when all
1033 # new commits are rooted in the same merge commit
1034 old2new_revs[last_revision] = head
1035
1036 # We care about all commits (--full-history and --sparse) and
1037 # we want reconstruct the topology and thus do not care
1038 # about ordering by time (--topo-order). We ask for the ones
1039 # we need to import first to be listed first (--reverse).
1040 revs = runcmd("git rev-list %s" % rev_list_args, **largs).split()
1041 logger.debug("To be imported: %s" % revs)
1042 # Now 'revs' contains all revisions reachable from the top revision.
1043 # All revisions derived from the 'last_revision' definitely are new,
1044 # whereas the others may or may not have been imported before. For
1045 # a linear history in the component, that second set will be empty.
1046 # To distinguish between them, we also get the shorter list
1047 # of revisions starting at the ancestor.
1048 if last_revision:
1049 ancestor_revs = runcmd("git rev-list --ancestry-path %s" % rev_list_args, **largs).split()
1050 else:
1051 ancestor_revs = []
1052 logger.debug("Ancestors: %s" % ancestor_revs)
1053
1054 # Now import each revision.
1055 logger.info("Importing commits from %s..." % name)
1056 def import_rev(rev):
1057 global scanned_revs
1058
1059 # If it is part of the new commits, we definitely need
1060 # to import it. Otherwise we need to check, we might have
1061 # imported it before. If it was imported and we merely
1062 # fail to find it because commit messages did not track
1063 # the mapping, then we end up importing it again. So
1064 # combined repos using "updating with history" really should
1065 # enable the "From ... rev:" commit header modifications.
1066 if rev not in ancestor_revs and rev not in old2new_revs and not scanned_revs:
1067 logger.debug("Revision %s triggers log analysis." % rev)
1068 find_revs(old2new_revs, head)
1069 scanned_revs = True
1070 new_rev = old2new_revs.get(rev, None)
1071 if new_rev:
1072 return new_rev
1073
1074 # If the commit is not in the original list of revisions
1075 # to be imported, then it must be a parent of one of those
1076 # commits and it was skipped during earlier imports or not
1077 # found. Importing such merge commits leads to very ugly
1078 # history (long cascade of merge commits which all point
1079 # to to older commits) when switching from "update via
1080 # patches" to "update with history".
1081 #
1082 # We can avoid importing merge commits if all non-merge commits
1083 # reachable from it were already imported. In that case we
1084 # can root the new commits in the current head revision.
1085 def is_imported(prev):
1086 parents = runcmd("git show --no-patch --pretty=format:%P " + prev, **largs).split()
1087 if len(parents) > 1:
1088 for p in parents:
1089 if not is_imported(p):
1090 logger.debug("Must import %s because %s is not imported." % (rev, p))
1091 return False
1092 return True
1093 elif prev in old2new_revs:
1094 return True
1095 else:
1096 logger.debug("Must import %s because %s is not imported." % (rev, prev))
1097 return False
1098 if rev not in revs and is_imported(rev):
1099 old2new_revs[rev] = head
1100 return head
1101
1102 # Need to import rev. Collect some information about it.
1103 logger.debug("Importing %s" % rev)
1104 (parents, author_name, author_email, author_timestamp, body) = \
1105 runcmd("git show --no-patch --pretty=format:%P%x00%an%x00%ae%x00%at%x00%B " + rev, **largs).split(chr(0))
1106 parents = parents.split()
1107 if parents:
1108 # Arbitrarily pick the first parent as base. It may or may not have
1109 # been imported before. For example, if the parent is a merge commit
1110 # and previously the combined repository used patching as update
1111 # method, then the actual merge commit parent never was imported.
1112 # To cover this, We recursively import parents.
1113 parent = parents[0]
1114 new_parent = import_rev(parent)
1115 # Clean index and working tree. TODO: can we combine this and the
1116 # next into one command with less file IO?
1117 # "git reset --hard" does not work, it changes HEAD of the parent
1118 # repo, which we wanted to avoid. Probably need to keep
1119 # track of the rev that corresponds to the index and use apply_commit().
1120 runcmd("git rm -q --ignore-unmatch -rf .", **wargs)
1121 # Update index and working tree to match the parent.
1122 runcmd("git checkout -q -f %s ." % new_parent, **wargs)
1123 else:
1124 parent = None
1125 # Clean index and working tree.
1126 runcmd("git rm -q --ignore-unmatch -rf .", **wargs)
1127
1128 # Modify index and working tree such that it mirrors the commit.
1129 apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=file_filter)
1130
1131 # Now commit.
1132 new_tree = runcmd("git write-tree", **wargs).strip()
1133 env = copy.deepcopy(wenv)
1134 env['GIT_AUTHOR_NAME'] = author_name
1135 env['GIT_AUTHOR_EMAIL'] = author_email
1136 env['GIT_AUTHOR_DATE'] = author_timestamp
1137 if hook:
1138 # Need to turn the verbatim commit message into something resembling a patch header
1139 # for the hook.
1140 with tempfile.NamedTemporaryFile(delete=False) as patch:
1141 patch.write('Subject: [PATCH] ')
1142 patch.write(body)
1143 patch.write('\n---\n')
1144 patch.close()
1145 runcmd([hook, patch.name, rev, name])
1146 with open(patch.name) as f:
1147 body = f.read()[len('Subject: [PATCH] '):][:-len('\n---\n')]
1148
1149 # We can skip non-merge commits that did not change any files. Those are typically
1150 # the result of file filtering, although they could also have been introduced
1151 # intentionally upstream, in which case we drop some information here.
1152 if len(parents) == 1:
1153 parent_rev = import_rev(parents[0])
1154 old_tree = runcmd("git show -s --pretty=format:%T " + parent_rev, **wargs).strip()
1155 commit = old_tree != new_tree
1156 if not commit:
1157 new_rev = parent_rev
1158 else:
1159 commit = True
1160 if commit:
1161 new_rev = runcmd("git commit-tree".split() + add_p([import_rev(p) for p in parents]) +
1162 ["-m", body, new_tree],
1163 env=env).strip()
1164 old2new_revs[rev] = new_rev
1165
1166 return new_rev
1167
1168 if revs:
1169 for rev in revs:
1170 import_rev(rev)
1171 # Remember how to update our current head. New components get added,
1172 # updated components get the delta between current head and the updated component
1173 # applied.
1174 additional_heads[old2new_revs[revs[-1]]] = head if repo['last_revision'] else None
1175 repo['last_revision'] = revs[-1]
1176
1177 # Now construct the final merge commit. We create the tree by
1178 # starting with the head and applying the changes from each
1179 # components imported head revision.
1180 if additional_heads:
1181 runcmd("git reset --hard", **wargs)
1182 for rev, base in additional_heads.iteritems():
1183 apply_commit(base, rev, wargs, wargs, None)
1184
1185 # Commit with all component branches as parents as well as the previous head.
1186 logger.info("Writing final merge commit...")
1187 msg = conf_commit_msg(conf, components)
1188 new_tree = runcmd("git write-tree", **wargs).strip()
1189 new_rev = runcmd("git commit-tree".split() +
1190 add_p([head] + additional_heads.keys()) +
1191 ["-m", msg, new_tree],
1192 **wargs).strip()
1193 # And done! This is the first time we change the HEAD in the actual work tree.
1194 runcmd("git reset --hard %s" % new_rev)
1195
1196 # Update and stage the (potentially modified)
1197 # combo-layer.conf, but do not commit separately.
1198 for name in repos:
1199 repo = conf.repos[name]
1200 rev = repo['last_revision']
1201 conf.update(name, "last_revision", rev)
1202 if commit_conf_file(conf, components, False):
1203 # Must augment the previous commit.
1204 runcmd("git commit --amend -C HEAD")
1205
1206
1207scanned_revs = False
1208def find_revs(old2new, head):
1209 '''Construct mapping from original commit hash to commit hash in
1210 combined repo by looking at the commit messages. Depends on the
1211 "From ... rev: ..." convention.'''
1212 logger.info("Analyzing log messages to find previously imported commits...")
1213 num_known = len(old2new)
1214 log = runcmd("git log --grep='From .* rev: [a-fA-F0-9][a-fA-F0-9]*' --pretty=format:%H%x00%B%x00 " + head).split(chr(0))
1215 regex = re.compile(r'From .* rev: ([a-fA-F0-9]+)')
1216 for new_rev, body in zip(*[iter(log)]* 2):
1217 # Use the last one, in the unlikely case there are more than one.
1218 rev = regex.findall(body)[-1]
1219 if rev not in old2new:
1220 old2new[rev] = new_rev.strip()
1221 logger.info("Found %d additional commits, leading to: %s" % (len(old2new) - num_known, old2new))
1222
1223
1224def apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=None):
1225 '''Compare revision against parent, remove files deleted in the
1226 commit, re-write new or modified ones. Moves them into dest_dir.
1227 Optionally filters files.
1228 '''
1229 if not dest_dir:
1230 dest_dir = "."
1231 # -r recurses into sub-directories, given is the full overview of
1232 # what changed. We do not care about copy/edits or renames, so we
1233 # can disable those with --no-renames (but we still parse them,
1234 # because it was not clear from git documentation whether C and M
1235 # lines can still occur).
1236 logger.debug("Applying changes between %s and %s in %s" % (parent, rev, largs["destdir"]))
1237 delete = []
1238 update = []
1239 if parent:
1240 # Apply delta.
1241 changes = runcmd("git diff-tree --no-commit-id --no-renames --name-status -r --raw -z %s %s" % (parent, rev), **largs).split(chr(0))
1242 for status, name in zip(*[iter(changes)]*2):
1243 if status[0] in "ACMRT":
1244 update.append(name)
1245 elif status[0] in "D":
1246 delete.append(name)
1247 else:
1248 logger.error("Unknown status %s of file %s in revision %s" % (status, name, rev))
1249 sys.exit(1)
1250 else:
1251 # Copy all files.
1252 update.extend(runcmd("git ls-tree -r --name-only -z %s" % rev, **largs).split(chr(0)))
1253
1254 # Include/exclude files as define in the component config.
1255 # Both updated and deleted file lists get filtered, because it might happen
1256 # that a file gets excluded, pulled from a different component, and then the
1257 # excluded file gets deleted. In that case we must keep the copy.
1258 if file_filter:
1259 file_filter(update)
1260 file_filter(delete)
1261
1262 # We export into a tar archive here and extract with tar because it is simple (no
1263 # need to implement file and symlink writing ourselves) and gives us some degree
1264 # of parallel IO. The downside is that we have to pass the list of files via
1265 # command line parameters - hopefully there will never be too many at once.
1266 if update:
1267 target = os.path.join(wargs["destdir"], dest_dir)
1268 if not os.path.isdir(target):
1269 os.makedirs(target)
1270 runcmd("git archive %s %s | tar -C %s -xf -" % (rev, ' '.join([pipes.quote(x) for x in update]), pipes.quote(target)), **largs)
1271 runcmd("git add -f".split() + [os.path.join(dest_dir, x) for x in update], **wargs)
1272 if delete:
1273 for path in delete:
1274 if dest_dir:
1275 path = os.path.join(dest_dir, path)
1276 runcmd("git rm -f --ignore-unmatch".split() + [os.path.join(dest_dir, x) for x in delete], **wargs)
1277
891def action_error(conf, args): 1278def action_error(conf, args):
892 logger.info("invalid action %s" % args[0]) 1279 logger.info("invalid action %s" % args[0])
893 1280