summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristopher Larson <kergoth@gmail.com>2017-05-13 02:46:27 +0500
committerRichard Purdie <richard.purdie@linuxfoundation.org>2017-06-02 13:36:57 +0100
commit2a60c406372d400437ecaa8712e6dc80b3d9fcec (patch)
treebdbd2d112e04d36a475c98735ed4cb7c054b702b
parentab4e578b86efcf533c43dfa76e97ea98cd9a5808 (diff)
downloadpoky-2a60c406372d400437ecaa8712e6dc80b3d9fcec.tar.gz
bitbake: git-make-shallow: add script to make a git repo shallow
This script will be used by the git fetcher to create shallow mirror tarballs. usage: git-make-shallow [-h] [--ref REF] [--shrink] REVISION [REVISION ...] Remove the history of the specified revisions, then optionally filter the available refs to those specified. positional arguments: REVISION a git revision/commit optional arguments: -h, --help show this help message and exit --ref REF, -r REF remove all but the specified refs (cumulative) --shrink, -s shrink the git repository by repacking and pruning While git does provide the ability to clone at a specific depth, and fetch all remote refs at a particular depth, the depth is across all branches/tags, and doesn't provide the flexibility we need, hence this script. Refs (branches+tags) can be filtered, as the process of history removal scales up rapidly with the number of refs. Even the existing `git fetch --depth=` is extremely slow on an upstream kernel repository with all the branches and tags kept. This uses the same underlying mechanism to implement the history removal which git itself uses (.git/shallow), and the results, when configured similarly, are in line with the results git itself produces with `fetch --depth`. (Bitbake rev: 0254020f0e1911c0eaf99111b91828d2a74a4ee1) Signed-off-by: Christopher Larson <chris_larson@mentor.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rwxr-xr-xbitbake/bin/git-make-shallow165
-rw-r--r--bitbake/lib/bb/tests/fetch.py127
2 files changed, 292 insertions, 0 deletions
diff --git a/bitbake/bin/git-make-shallow b/bitbake/bin/git-make-shallow
new file mode 100755
index 0000000000..296d3a3dbd
--- /dev/null
+++ b/bitbake/bin/git-make-shallow
@@ -0,0 +1,165 @@
1#!/usr/bin/env python3
2"""git-make-shallow: make the current git repository shallow
3
4Remove the history of the specified revisions, then optionally filter the
5available refs to those specified.
6"""
7
8import argparse
9import collections
10import errno
11import itertools
12import os
13import subprocess
14import sys
15
16version = 1.0
17
18
19def main():
20 if sys.version_info < (3, 4, 0):
21 sys.exit('Python 3.4 or greater is required')
22
23 git_dir = check_output(['git', 'rev-parse', '--git-dir']).rstrip()
24 shallow_file = os.path.join(git_dir, 'shallow')
25 if os.path.exists(shallow_file):
26 try:
27 check_output(['git', 'fetch', '--unshallow'])
28 except subprocess.CalledProcessError:
29 try:
30 os.unlink(shallow_file)
31 except OSError as exc:
32 if exc.errno != errno.ENOENT:
33 raise
34
35 args = process_args()
36 revs = check_output(['git', 'rev-list'] + args.revisions).splitlines()
37
38 make_shallow(shallow_file, args.revisions, args.refs)
39
40 ref_revs = check_output(['git', 'rev-list'] + args.refs).splitlines()
41 remaining_history = set(revs) & set(ref_revs)
42 for rev in remaining_history:
43 if check_output(['git', 'rev-parse', '{}^@'.format(rev)]):
44 sys.exit('Error: %s was not made shallow' % rev)
45
46 filter_refs(args.refs)
47
48 if args.shrink:
49 shrink_repo(git_dir)
50 subprocess.check_call(['git', 'fsck', '--unreachable'])
51
52
53def process_args():
54 # TODO: add argument to automatically keep local-only refs, since they
55 # can't be easily restored with a git fetch.
56 parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.')
57 parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)')
58 parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning')
59 parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit')
60 if len(sys.argv) < 2:
61 parser.print_help()
62 sys.exit(2)
63
64 args = parser.parse_args()
65
66 if args.refs:
67 args.refs = check_output(['git', 'rev-parse', '--symbolic-full-name'] + args.refs).splitlines()
68 else:
69 args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit')
70
71 args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs))
72 args.revisions = check_output(['git', 'rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines()
73 return args
74
75
76def check_output(cmd, input=None):
77 return subprocess.check_output(cmd, universal_newlines=True, input=input)
78
79
80def make_shallow(shallow_file, revisions, refs):
81 """Remove the history of the specified revisions."""
82 for rev in follow_history_intersections(revisions, refs):
83 print("Processing %s" % rev)
84 with open(shallow_file, 'a') as f:
85 f.write(rev + '\n')
86
87
88def get_all_refs(ref_filter=None):
89 """Return all the existing refs in this repository, optionally filtering the refs."""
90 ref_output = check_output(['git', 'for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)'])
91 ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()]
92 if ref_filter:
93 ref_split = (e for e in ref_split if ref_filter(*e))
94 refs = [r[0] for r in ref_split]
95 return refs
96
97
98def iter_extend(iterable, length, obj=None):
99 """Ensure that iterable is the specified length by extending with obj."""
100 return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length)
101
102
103def filter_refs(refs):
104 """Remove all but the specified refs from the git repository."""
105 all_refs = get_all_refs()
106 to_remove = set(all_refs) - set(refs)
107 if to_remove:
108 check_output(['xargs', '-0', '-n', '1', 'git', 'update-ref', '-d', '--no-deref'],
109 input=''.join(l + '\0' for l in to_remove))
110
111
112def follow_history_intersections(revisions, refs):
113 """Determine all the points where the history of the specified revisions intersects the specified refs."""
114 queue = collections.deque(revisions)
115 seen = set()
116
117 for rev in iter_except(queue.popleft, IndexError):
118 if rev in seen:
119 continue
120
121 parents = check_output(['git', 'rev-parse', '%s^@' % rev]).splitlines()
122
123 yield rev
124 seen.add(rev)
125
126 if not parents:
127 continue
128
129 check_refs = check_output(['git', 'merge-base', '--independent'] + sorted(refs)).splitlines()
130 for parent in parents:
131 for ref in check_refs:
132 print("Checking %s vs %s" % (parent, ref))
133 try:
134 merge_base = check_output(['git', 'merge-base', parent, ref]).rstrip()
135 except subprocess.CalledProcessError:
136 continue
137 else:
138 queue.append(merge_base)
139
140
141def iter_except(func, exception, start=None):
142 """Yield a function repeatedly until it raises an exception."""
143 try:
144 if start is not None:
145 yield start()
146 while True:
147 yield func()
148 except exception:
149 pass
150
151
152def shrink_repo(git_dir):
153 """Shrink the newly shallow repository, removing the unreachable objects."""
154 subprocess.check_call(['git', 'reflog', 'expire', '--expire-unreachable=now', '--all'])
155 subprocess.check_call(['git', 'repack', '-ad'])
156 try:
157 os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates'))
158 except OSError as exc:
159 if exc.errno != errno.ENOENT:
160 raise
161 subprocess.check_call(['git', 'prune', '--expire', 'now'])
162
163
164if __name__ == '__main__':
165 main()
diff --git a/bitbake/lib/bb/tests/fetch.py b/bitbake/lib/bb/tests/fetch.py
index 0fd2c02163..510071d25d 100644
--- a/bitbake/lib/bb/tests/fetch.py
+++ b/bitbake/lib/bb/tests/fetch.py
@@ -852,3 +852,130 @@ class FetchCheckStatusTest(FetcherTest):
852 self.assertTrue(ret, msg="URI %s, can't check status" % (u)) 852 self.assertTrue(ret, msg="URI %s, can't check status" % (u))
853 853
854 connection_cache.close_connections() 854 connection_cache.close_connections()
855
856
857class GitMakeShallowTest(FetcherTest):
858 bitbake_dir = os.path.join(os.path.dirname(os.path.join(__file__)), '..', '..', '..')
859 make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow')
860
861 def setUp(self):
862 FetcherTest.setUp(self)
863 self.gitdir = os.path.join(self.tempdir, 'gitshallow')
864 bb.utils.mkdirhier(self.gitdir)
865 bb.process.run('git init', cwd=self.gitdir)
866
867 def assertRefs(self, expected_refs):
868 actual_refs = self.git(['for-each-ref', '--format=%(refname)']).splitlines()
869 full_expected = self.git(['rev-parse', '--symbolic-full-name'] + expected_refs).splitlines()
870 self.assertEqual(sorted(full_expected), sorted(actual_refs))
871
872 def assertRevCount(self, expected_count, args=None):
873 if args is None:
874 args = ['HEAD']
875 revs = self.git(['rev-list'] + args)
876 actual_count = len(revs.splitlines())
877 self.assertEqual(expected_count, actual_count, msg='Object count `%d` is not the expected `%d`' % (actual_count, expected_count))
878
879 def git(self, cmd):
880 if isinstance(cmd, str):
881 cmd = 'git ' + cmd
882 else:
883 cmd = ['git'] + cmd
884 return bb.process.run(cmd, cwd=self.gitdir)[0]
885
886 def make_shallow(self, args=None):
887 if args is None:
888 args = ['HEAD']
889 return bb.process.run([self.make_shallow_path] + args, cwd=self.gitdir)
890
891 def add_empty_file(self, path, msg=None):
892 if msg is None:
893 msg = path
894 open(os.path.join(self.gitdir, path), 'w').close()
895 self.git(['add', path])
896 self.git(['commit', '-m', msg, path])
897
898 def test_make_shallow_single_branch_no_merge(self):
899 self.add_empty_file('a')
900 self.add_empty_file('b')
901 self.assertRevCount(2)
902 self.make_shallow()
903 self.assertRevCount(1)
904
905 def test_make_shallow_single_branch_one_merge(self):
906 self.add_empty_file('a')
907 self.add_empty_file('b')
908 self.git('checkout -b a_branch')
909 self.add_empty_file('c')
910 self.git('checkout master')
911 self.add_empty_file('d')
912 self.git('merge --no-ff --no-edit a_branch')
913 self.git('branch -d a_branch')
914 self.add_empty_file('e')
915 self.assertRevCount(6)
916 self.make_shallow(['HEAD~2'])
917 self.assertRevCount(5)
918
919 def test_make_shallow_at_merge(self):
920 self.add_empty_file('a')
921 self.git('checkout -b a_branch')
922 self.add_empty_file('b')
923 self.git('checkout master')
924 self.git('merge --no-ff --no-edit a_branch')
925 self.git('branch -d a_branch')
926 self.assertRevCount(3)
927 self.make_shallow()
928 self.assertRevCount(1)
929
930 def test_make_shallow_annotated_tag(self):
931 self.add_empty_file('a')
932 self.add_empty_file('b')
933 self.git('tag -a -m a_tag a_tag')
934 self.assertRevCount(2)
935 self.make_shallow(['a_tag'])
936 self.assertRevCount(1)
937
938 def test_make_shallow_multi_ref(self):
939 self.add_empty_file('a')
940 self.add_empty_file('b')
941 self.git('checkout -b a_branch')
942 self.add_empty_file('c')
943 self.git('checkout master')
944 self.add_empty_file('d')
945 self.git('checkout -b a_branch_2')
946 self.add_empty_file('a_tag')
947 self.git('tag a_tag')
948 self.git('checkout master')
949 self.git('branch -D a_branch_2')
950 self.add_empty_file('e')
951 self.assertRevCount(6, ['--all'])
952 self.make_shallow()
953 self.assertRevCount(5, ['--all'])
954
955 def test_make_shallow_multi_ref_trim(self):
956 self.add_empty_file('a')
957 self.git('checkout -b a_branch')
958 self.add_empty_file('c')
959 self.git('checkout master')
960 self.assertRevCount(1)
961 self.assertRevCount(2, ['--all'])
962 self.assertRefs(['master', 'a_branch'])
963 self.make_shallow(['-r', 'master', 'HEAD'])
964 self.assertRevCount(1, ['--all'])
965 self.assertRefs(['master'])
966
967 def test_make_shallow_noop(self):
968 self.add_empty_file('a')
969 self.assertRevCount(1)
970 self.make_shallow()
971 self.assertRevCount(1)
972
973 if os.environ.get("BB_SKIP_NETTESTS") == "yes":
974 print("Unset BB_SKIP_NETTESTS to run network tests")
975 else:
976 def test_make_shallow_bitbake(self):
977 self.git('remote add origin https://github.com/openembedded/bitbake')
978 self.git('fetch --tags origin')
979 orig_revs = len(self.git('rev-list --all').splitlines())
980 self.make_shallow(['refs/tags/1.10.0'])
981 self.assertRevCount(orig_revs - 1746, ['--all'])