diff options
author | Christopher Larson <kergoth@gmail.com> | 2017-05-13 02:46:27 +0500 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2017-06-02 13:36:57 +0100 |
commit | 2a60c406372d400437ecaa8712e6dc80b3d9fcec (patch) | |
tree | bdbd2d112e04d36a475c98735ed4cb7c054b702b | |
parent | ab4e578b86efcf533c43dfa76e97ea98cd9a5808 (diff) | |
download | poky-2a60c406372d400437ecaa8712e6dc80b3d9fcec.tar.gz |
bitbake: git-make-shallow: add script to make a git repo shallow
This script will be used by the git fetcher to create shallow mirror tarballs.
usage: git-make-shallow [-h] [--ref REF] [--shrink] REVISION [REVISION ...]
Remove the history of the specified revisions, then optionally filter the
available refs to those specified.
positional arguments:
REVISION a git revision/commit
optional arguments:
-h, --help show this help message and exit
--ref REF, -r REF remove all but the specified refs (cumulative)
--shrink, -s shrink the git repository by repacking and pruning
While git does provide the ability to clone at a specific depth, and fetch all
remote refs at a particular depth, the depth is across all branches/tags, and
doesn't provide the flexibility we need, hence this script.
Refs (branches+tags) can be filtered, as the process of history removal scales
up rapidly with the number of refs. Even the existing `git fetch --depth=` is
extremely slow on an upstream kernel repository with all the branches and tags
kept.
This uses the same underlying mechanism to implement the history removal which
git itself uses (.git/shallow), and the results, when configured similarly, are
in line with the results git itself produces with `fetch --depth`.
(Bitbake rev: 0254020f0e1911c0eaf99111b91828d2a74a4ee1)
Signed-off-by: Christopher Larson <chris_larson@mentor.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rwxr-xr-x | bitbake/bin/git-make-shallow | 165 | ||||
-rw-r--r-- | bitbake/lib/bb/tests/fetch.py | 127 |
2 files changed, 292 insertions, 0 deletions
diff --git a/bitbake/bin/git-make-shallow b/bitbake/bin/git-make-shallow new file mode 100755 index 0000000000..296d3a3dbd --- /dev/null +++ b/bitbake/bin/git-make-shallow | |||
@@ -0,0 +1,165 @@ | |||
1 | #!/usr/bin/env python3 | ||
2 | """git-make-shallow: make the current git repository shallow | ||
3 | |||
4 | Remove the history of the specified revisions, then optionally filter the | ||
5 | available refs to those specified. | ||
6 | """ | ||
7 | |||
8 | import argparse | ||
9 | import collections | ||
10 | import errno | ||
11 | import itertools | ||
12 | import os | ||
13 | import subprocess | ||
14 | import sys | ||
15 | |||
16 | version = 1.0 | ||
17 | |||
18 | |||
19 | def main(): | ||
20 | if sys.version_info < (3, 4, 0): | ||
21 | sys.exit('Python 3.4 or greater is required') | ||
22 | |||
23 | git_dir = check_output(['git', 'rev-parse', '--git-dir']).rstrip() | ||
24 | shallow_file = os.path.join(git_dir, 'shallow') | ||
25 | if os.path.exists(shallow_file): | ||
26 | try: | ||
27 | check_output(['git', 'fetch', '--unshallow']) | ||
28 | except subprocess.CalledProcessError: | ||
29 | try: | ||
30 | os.unlink(shallow_file) | ||
31 | except OSError as exc: | ||
32 | if exc.errno != errno.ENOENT: | ||
33 | raise | ||
34 | |||
35 | args = process_args() | ||
36 | revs = check_output(['git', 'rev-list'] + args.revisions).splitlines() | ||
37 | |||
38 | make_shallow(shallow_file, args.revisions, args.refs) | ||
39 | |||
40 | ref_revs = check_output(['git', 'rev-list'] + args.refs).splitlines() | ||
41 | remaining_history = set(revs) & set(ref_revs) | ||
42 | for rev in remaining_history: | ||
43 | if check_output(['git', 'rev-parse', '{}^@'.format(rev)]): | ||
44 | sys.exit('Error: %s was not made shallow' % rev) | ||
45 | |||
46 | filter_refs(args.refs) | ||
47 | |||
48 | if args.shrink: | ||
49 | shrink_repo(git_dir) | ||
50 | subprocess.check_call(['git', 'fsck', '--unreachable']) | ||
51 | |||
52 | |||
53 | def process_args(): | ||
54 | # TODO: add argument to automatically keep local-only refs, since they | ||
55 | # can't be easily restored with a git fetch. | ||
56 | parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.') | ||
57 | parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)') | ||
58 | parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning') | ||
59 | parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit') | ||
60 | if len(sys.argv) < 2: | ||
61 | parser.print_help() | ||
62 | sys.exit(2) | ||
63 | |||
64 | args = parser.parse_args() | ||
65 | |||
66 | if args.refs: | ||
67 | args.refs = check_output(['git', 'rev-parse', '--symbolic-full-name'] + args.refs).splitlines() | ||
68 | else: | ||
69 | args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit') | ||
70 | |||
71 | args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs)) | ||
72 | args.revisions = check_output(['git', 'rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines() | ||
73 | return args | ||
74 | |||
75 | |||
76 | def check_output(cmd, input=None): | ||
77 | return subprocess.check_output(cmd, universal_newlines=True, input=input) | ||
78 | |||
79 | |||
80 | def make_shallow(shallow_file, revisions, refs): | ||
81 | """Remove the history of the specified revisions.""" | ||
82 | for rev in follow_history_intersections(revisions, refs): | ||
83 | print("Processing %s" % rev) | ||
84 | with open(shallow_file, 'a') as f: | ||
85 | f.write(rev + '\n') | ||
86 | |||
87 | |||
88 | def get_all_refs(ref_filter=None): | ||
89 | """Return all the existing refs in this repository, optionally filtering the refs.""" | ||
90 | ref_output = check_output(['git', 'for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)']) | ||
91 | ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()] | ||
92 | if ref_filter: | ||
93 | ref_split = (e for e in ref_split if ref_filter(*e)) | ||
94 | refs = [r[0] for r in ref_split] | ||
95 | return refs | ||
96 | |||
97 | |||
98 | def iter_extend(iterable, length, obj=None): | ||
99 | """Ensure that iterable is the specified length by extending with obj.""" | ||
100 | return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length) | ||
101 | |||
102 | |||
103 | def filter_refs(refs): | ||
104 | """Remove all but the specified refs from the git repository.""" | ||
105 | all_refs = get_all_refs() | ||
106 | to_remove = set(all_refs) - set(refs) | ||
107 | if to_remove: | ||
108 | check_output(['xargs', '-0', '-n', '1', 'git', 'update-ref', '-d', '--no-deref'], | ||
109 | input=''.join(l + '\0' for l in to_remove)) | ||
110 | |||
111 | |||
112 | def follow_history_intersections(revisions, refs): | ||
113 | """Determine all the points where the history of the specified revisions intersects the specified refs.""" | ||
114 | queue = collections.deque(revisions) | ||
115 | seen = set() | ||
116 | |||
117 | for rev in iter_except(queue.popleft, IndexError): | ||
118 | if rev in seen: | ||
119 | continue | ||
120 | |||
121 | parents = check_output(['git', 'rev-parse', '%s^@' % rev]).splitlines() | ||
122 | |||
123 | yield rev | ||
124 | seen.add(rev) | ||
125 | |||
126 | if not parents: | ||
127 | continue | ||
128 | |||
129 | check_refs = check_output(['git', 'merge-base', '--independent'] + sorted(refs)).splitlines() | ||
130 | for parent in parents: | ||
131 | for ref in check_refs: | ||
132 | print("Checking %s vs %s" % (parent, ref)) | ||
133 | try: | ||
134 | merge_base = check_output(['git', 'merge-base', parent, ref]).rstrip() | ||
135 | except subprocess.CalledProcessError: | ||
136 | continue | ||
137 | else: | ||
138 | queue.append(merge_base) | ||
139 | |||
140 | |||
141 | def iter_except(func, exception, start=None): | ||
142 | """Yield a function repeatedly until it raises an exception.""" | ||
143 | try: | ||
144 | if start is not None: | ||
145 | yield start() | ||
146 | while True: | ||
147 | yield func() | ||
148 | except exception: | ||
149 | pass | ||
150 | |||
151 | |||
152 | def shrink_repo(git_dir): | ||
153 | """Shrink the newly shallow repository, removing the unreachable objects.""" | ||
154 | subprocess.check_call(['git', 'reflog', 'expire', '--expire-unreachable=now', '--all']) | ||
155 | subprocess.check_call(['git', 'repack', '-ad']) | ||
156 | try: | ||
157 | os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates')) | ||
158 | except OSError as exc: | ||
159 | if exc.errno != errno.ENOENT: | ||
160 | raise | ||
161 | subprocess.check_call(['git', 'prune', '--expire', 'now']) | ||
162 | |||
163 | |||
164 | if __name__ == '__main__': | ||
165 | main() | ||
diff --git a/bitbake/lib/bb/tests/fetch.py b/bitbake/lib/bb/tests/fetch.py index 0fd2c02163..510071d25d 100644 --- a/bitbake/lib/bb/tests/fetch.py +++ b/bitbake/lib/bb/tests/fetch.py | |||
@@ -852,3 +852,130 @@ class FetchCheckStatusTest(FetcherTest): | |||
852 | self.assertTrue(ret, msg="URI %s, can't check status" % (u)) | 852 | self.assertTrue(ret, msg="URI %s, can't check status" % (u)) |
853 | 853 | ||
854 | connection_cache.close_connections() | 854 | connection_cache.close_connections() |
855 | |||
856 | |||
857 | class GitMakeShallowTest(FetcherTest): | ||
858 | bitbake_dir = os.path.join(os.path.dirname(os.path.join(__file__)), '..', '..', '..') | ||
859 | make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow') | ||
860 | |||
861 | def setUp(self): | ||
862 | FetcherTest.setUp(self) | ||
863 | self.gitdir = os.path.join(self.tempdir, 'gitshallow') | ||
864 | bb.utils.mkdirhier(self.gitdir) | ||
865 | bb.process.run('git init', cwd=self.gitdir) | ||
866 | |||
867 | def assertRefs(self, expected_refs): | ||
868 | actual_refs = self.git(['for-each-ref', '--format=%(refname)']).splitlines() | ||
869 | full_expected = self.git(['rev-parse', '--symbolic-full-name'] + expected_refs).splitlines() | ||
870 | self.assertEqual(sorted(full_expected), sorted(actual_refs)) | ||
871 | |||
872 | def assertRevCount(self, expected_count, args=None): | ||
873 | if args is None: | ||
874 | args = ['HEAD'] | ||
875 | revs = self.git(['rev-list'] + args) | ||
876 | actual_count = len(revs.splitlines()) | ||
877 | self.assertEqual(expected_count, actual_count, msg='Object count `%d` is not the expected `%d`' % (actual_count, expected_count)) | ||
878 | |||
879 | def git(self, cmd): | ||
880 | if isinstance(cmd, str): | ||
881 | cmd = 'git ' + cmd | ||
882 | else: | ||
883 | cmd = ['git'] + cmd | ||
884 | return bb.process.run(cmd, cwd=self.gitdir)[0] | ||
885 | |||
886 | def make_shallow(self, args=None): | ||
887 | if args is None: | ||
888 | args = ['HEAD'] | ||
889 | return bb.process.run([self.make_shallow_path] + args, cwd=self.gitdir) | ||
890 | |||
891 | def add_empty_file(self, path, msg=None): | ||
892 | if msg is None: | ||
893 | msg = path | ||
894 | open(os.path.join(self.gitdir, path), 'w').close() | ||
895 | self.git(['add', path]) | ||
896 | self.git(['commit', '-m', msg, path]) | ||
897 | |||
898 | def test_make_shallow_single_branch_no_merge(self): | ||
899 | self.add_empty_file('a') | ||
900 | self.add_empty_file('b') | ||
901 | self.assertRevCount(2) | ||
902 | self.make_shallow() | ||
903 | self.assertRevCount(1) | ||
904 | |||
905 | def test_make_shallow_single_branch_one_merge(self): | ||
906 | self.add_empty_file('a') | ||
907 | self.add_empty_file('b') | ||
908 | self.git('checkout -b a_branch') | ||
909 | self.add_empty_file('c') | ||
910 | self.git('checkout master') | ||
911 | self.add_empty_file('d') | ||
912 | self.git('merge --no-ff --no-edit a_branch') | ||
913 | self.git('branch -d a_branch') | ||
914 | self.add_empty_file('e') | ||
915 | self.assertRevCount(6) | ||
916 | self.make_shallow(['HEAD~2']) | ||
917 | self.assertRevCount(5) | ||
918 | |||
919 | def test_make_shallow_at_merge(self): | ||
920 | self.add_empty_file('a') | ||
921 | self.git('checkout -b a_branch') | ||
922 | self.add_empty_file('b') | ||
923 | self.git('checkout master') | ||
924 | self.git('merge --no-ff --no-edit a_branch') | ||
925 | self.git('branch -d a_branch') | ||
926 | self.assertRevCount(3) | ||
927 | self.make_shallow() | ||
928 | self.assertRevCount(1) | ||
929 | |||
930 | def test_make_shallow_annotated_tag(self): | ||
931 | self.add_empty_file('a') | ||
932 | self.add_empty_file('b') | ||
933 | self.git('tag -a -m a_tag a_tag') | ||
934 | self.assertRevCount(2) | ||
935 | self.make_shallow(['a_tag']) | ||
936 | self.assertRevCount(1) | ||
937 | |||
938 | def test_make_shallow_multi_ref(self): | ||
939 | self.add_empty_file('a') | ||
940 | self.add_empty_file('b') | ||
941 | self.git('checkout -b a_branch') | ||
942 | self.add_empty_file('c') | ||
943 | self.git('checkout master') | ||
944 | self.add_empty_file('d') | ||
945 | self.git('checkout -b a_branch_2') | ||
946 | self.add_empty_file('a_tag') | ||
947 | self.git('tag a_tag') | ||
948 | self.git('checkout master') | ||
949 | self.git('branch -D a_branch_2') | ||
950 | self.add_empty_file('e') | ||
951 | self.assertRevCount(6, ['--all']) | ||
952 | self.make_shallow() | ||
953 | self.assertRevCount(5, ['--all']) | ||
954 | |||
955 | def test_make_shallow_multi_ref_trim(self): | ||
956 | self.add_empty_file('a') | ||
957 | self.git('checkout -b a_branch') | ||
958 | self.add_empty_file('c') | ||
959 | self.git('checkout master') | ||
960 | self.assertRevCount(1) | ||
961 | self.assertRevCount(2, ['--all']) | ||
962 | self.assertRefs(['master', 'a_branch']) | ||
963 | self.make_shallow(['-r', 'master', 'HEAD']) | ||
964 | self.assertRevCount(1, ['--all']) | ||
965 | self.assertRefs(['master']) | ||
966 | |||
967 | def test_make_shallow_noop(self): | ||
968 | self.add_empty_file('a') | ||
969 | self.assertRevCount(1) | ||
970 | self.make_shallow() | ||
971 | self.assertRevCount(1) | ||
972 | |||
973 | if os.environ.get("BB_SKIP_NETTESTS") == "yes": | ||
974 | print("Unset BB_SKIP_NETTESTS to run network tests") | ||
975 | else: | ||
976 | def test_make_shallow_bitbake(self): | ||
977 | self.git('remote add origin https://github.com/openembedded/bitbake') | ||
978 | self.git('fetch --tags origin') | ||
979 | orig_revs = len(self.git('rev-list --all').splitlines()) | ||
980 | self.make_shallow(['refs/tags/1.10.0']) | ||
981 | self.assertRevCount(orig_revs - 1746, ['--all']) | ||