summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilip Lorenz <philip.lorenz@bmw.de>2024-02-27 07:18:08 +0100
committerSteve Sakoman <steve@sakoman.com>2024-03-01 08:00:58 -1000
commitb3e316e8486e5462c3c71a9c8248779a5c253385 (patch)
treeec8ef8021bc3c63491b0a7ee4663f65c3c20d8f2
parentec62e15f12f3c69d1e9b484cb2cbb8f84fdf73d8 (diff)
downloadpoky-b3e316e8486e5462c3c71a9c8248779a5c253385.tar.gz
bitbake: fetch2: Ensure that git LFS objects are available
The current implementation only performs a git lfs fetch alongside of a regular git fetch. This causes issues when the downloaded revision is already part of the fetched repository (e.g. because of moving back in history or the updated revision already being part of the repository at the time of the initial clone). Fix this by explicitly checking whether the required LFS objects are available in the downloade directory before confirming that a downloaded repository is up-to-date. This issue previously went unnoticed as git lfs would silently fetch the missing objects during the `unpack` task. With network isolation turned on, this no longer works, and unpacking fails. (cherry picked from commit cfae1556bf671acec119a6c8bbc4b667a856b9ae) (Bitbake rev: 40fd5f4eef7460ca67f32cfce8e229e67e1ff607) Signed-off-by: Philip Lorenz <philip.lorenz@bmw.de> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org> Signed-off-by: Philip Lorenz <philip.lorenz@bmw.de> Signed-off-by: Steve Sakoman <steve@sakoman.com>
-rw-r--r--bitbake/lib/bb/fetch2/git.py45
-rw-r--r--bitbake/lib/bb/tests/fetch.py51
2 files changed, 90 insertions, 6 deletions
diff --git a/bitbake/lib/bb/fetch2/git.py b/bitbake/lib/bb/fetch2/git.py
index 4d6e57ade7..9ecc855af8 100644
--- a/bitbake/lib/bb/fetch2/git.py
+++ b/bitbake/lib/bb/fetch2/git.py
@@ -307,7 +307,10 @@ class Git(FetchMethod):
307 return ud.clonedir 307 return ud.clonedir
308 308
309 def need_update(self, ud, d): 309 def need_update(self, ud, d):
310 return self.clonedir_need_update(ud, d) or self.shallow_tarball_need_update(ud) or self.tarball_need_update(ud) 310 return self.clonedir_need_update(ud, d) \
311 or self.shallow_tarball_need_update(ud) \
312 or self.tarball_need_update(ud) \
313 or self.lfs_need_update(ud, d)
311 314
312 def clonedir_need_update(self, ud, d): 315 def clonedir_need_update(self, ud, d):
313 if not os.path.exists(ud.clonedir): 316 if not os.path.exists(ud.clonedir):
@@ -319,6 +322,15 @@ class Git(FetchMethod):
319 return True 322 return True
320 return False 323 return False
321 324
325 def lfs_need_update(self, ud, d):
326 if self.clonedir_need_update(ud, d):
327 return True
328
329 for name in ud.names:
330 if not self._lfs_objects_downloaded(ud, d, name, ud.clonedir):
331 return True
332 return False
333
322 def clonedir_need_shallow_revs(self, ud, d): 334 def clonedir_need_shallow_revs(self, ud, d):
323 for rev in ud.shallow_revs: 335 for rev in ud.shallow_revs:
324 try: 336 try:
@@ -406,7 +418,7 @@ class Git(FetchMethod):
406 if missing_rev: 418 if missing_rev:
407 raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev) 419 raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev)
408 420
409 if self._contains_lfs(ud, d, ud.clonedir) and self._need_lfs(ud): 421 if self.lfs_need_update(ud, d):
410 # Unpack temporary working copy, use it to run 'git checkout' to force pre-fetching 422 # Unpack temporary working copy, use it to run 'git checkout' to force pre-fetching
411 # of all LFS blobs needed at the srcrev. 423 # of all LFS blobs needed at the srcrev.
412 # 424 #
@@ -649,6 +661,35 @@ class Git(FetchMethod):
649 raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output)) 661 raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output))
650 return output.split()[0] != "0" 662 return output.split()[0] != "0"
651 663
664 def _lfs_objects_downloaded(self, ud, d, name, wd):
665 """
666 Verifies whether the LFS objects for requested revisions have already been downloaded
667 """
668 # Bail out early if this repository doesn't use LFS
669 if not self._need_lfs(ud) or not self._contains_lfs(ud, d, wd):
670 return True
671
672 # The Git LFS specification specifies ([1]) the LFS folder layout so it should be safe to check for file
673 # existence.
674 # [1] https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git
675 cmd = "%s lfs ls-files -l %s" \
676 % (ud.basecmd, ud.revisions[name])
677 output = runfetchcmd(cmd, d, quiet=True, workdir=wd).rstrip()
678 # Do not do any further matching if no objects are managed by LFS
679 if not output:
680 return True
681
682 # Match all lines beginning with the hexadecimal OID
683 oid_regex = re.compile("^(([a-fA-F0-9]{2})([a-fA-F0-9]{2})[A-Fa-f0-9]+)")
684 for line in output.split("\n"):
685 oid = re.search(oid_regex, line)
686 if not oid:
687 bb.warn("git lfs ls-files output '%s' did not match expected format." % line)
688 if not os.path.exists(os.path.join(wd, "lfs", "objects", oid.group(2), oid.group(3), oid.group(1))):
689 return False
690
691 return True
692
652 def _need_lfs(self, ud): 693 def _need_lfs(self, ud):
653 return ud.parm.get("lfs", "1") == "1" 694 return ud.parm.get("lfs", "1") == "1"
654 695
diff --git a/bitbake/lib/bb/tests/fetch.py b/bitbake/lib/bb/tests/fetch.py
index 847a35602d..5aa3e464dd 100644
--- a/bitbake/lib/bb/tests/fetch.py
+++ b/bitbake/lib/bb/tests/fetch.py
@@ -6,6 +6,7 @@
6# SPDX-License-Identifier: GPL-2.0-only 6# SPDX-License-Identifier: GPL-2.0-only
7# 7#
8 8
9import contextlib
9import unittest 10import unittest
10import hashlib 11import hashlib
11import tempfile 12import tempfile
@@ -2182,10 +2183,14 @@ class GitLfsTest(FetcherTest):
2182 2183
2183 bb.utils.mkdirhier(self.srcdir) 2184 bb.utils.mkdirhier(self.srcdir)
2184 self.git_init(cwd=self.srcdir) 2185 self.git_init(cwd=self.srcdir)
2185 with open(os.path.join(self.srcdir, '.gitattributes'), 'wt') as attrs: 2186 self.commit_file('.gitattributes', '*.mp3 filter=lfs -text')
2186 attrs.write('*.mp3 filter=lfs -text') 2187
2187 self.git(['add', '.gitattributes'], cwd=self.srcdir) 2188 def commit_file(self, filename, content):
2188 self.git(['commit', '-m', "attributes", '.gitattributes'], cwd=self.srcdir) 2189 with open(os.path.join(self.srcdir, filename), "w") as f:
2190 f.write(content)
2191 self.git(["add", filename], cwd=self.srcdir)
2192 self.git(["commit", "-m", "Change"], cwd=self.srcdir)
2193 return self.git(["rev-parse", "HEAD"], cwd=self.srcdir).strip()
2189 2194
2190 def fetch(self, uri=None, download=True): 2195 def fetch(self, uri=None, download=True):
2191 uris = self.d.getVar('SRC_URI').split() 2196 uris = self.d.getVar('SRC_URI').split()
@@ -2206,6 +2211,44 @@ class GitLfsTest(FetcherTest):
2206 return unpacked_lfs_file 2211 return unpacked_lfs_file
2207 2212
2208 @skipIfNoGitLFS() 2213 @skipIfNoGitLFS()
2214 def test_fetch_lfs_on_srcrev_change(self):
2215 """Test if fetch downloads missing LFS objects when a different revision within an existing repository is requested"""
2216 self.git(["lfs", "install", "--local"], cwd=self.srcdir)
2217
2218 @contextlib.contextmanager
2219 def hide_upstream_repository():
2220 """Hide the upstream repository to make sure that git lfs cannot pull from it"""
2221 temp_name = self.srcdir + ".bak"
2222 os.rename(self.srcdir, temp_name)
2223 try:
2224 yield
2225 finally:
2226 os.rename(temp_name, self.srcdir)
2227
2228 def fetch_and_verify(revision, filename, content):
2229 self.d.setVar('SRCREV', revision)
2230 fetcher, ud = self.fetch()
2231
2232 with hide_upstream_repository():
2233 workdir = self.d.getVar('WORKDIR')
2234 fetcher.unpack(workdir)
2235
2236 with open(os.path.join(workdir, "git", filename)) as f:
2237 self.assertEqual(f.read(), content)
2238
2239 commit_1 = self.commit_file("a.mp3", "version 1")
2240 commit_2 = self.commit_file("a.mp3", "version 2")
2241
2242 self.d.setVar('SRC_URI', "git://%s;protocol=file;lfs=1;branch=master" % self.srcdir)
2243
2244 # Seed the local download folder by fetching the latest commit and verifying that the LFS contents are
2245 # available even when the upstream repository disappears.
2246 fetch_and_verify(commit_2, "a.mp3", "version 2")
2247 # Verify that even when an older revision is fetched, the needed LFS objects are fetched into the download
2248 # folder.
2249 fetch_and_verify(commit_1, "a.mp3", "version 1")
2250
2251 @skipIfNoGitLFS()
2209 @skipIfNoNetwork() 2252 @skipIfNoNetwork()
2210 def test_real_git_lfs_repo_succeeds_without_lfs_param(self): 2253 def test_real_git_lfs_repo_succeeds_without_lfs_param(self):
2211 self.d.setVar('SRC_URI', "git://gitlab.com/gitlab-examples/lfs.git;protocol=https;branch=master") 2254 self.d.setVar('SRC_URI', "git://gitlab.com/gitlab-examples/lfs.git;protocol=https;branch=master")