summaryrefslogtreecommitdiffstats
path: root/subcmds
diff options
context:
space:
mode:
authorGavin Mak <gavinmak@google.com>2025-12-09 22:29:43 +0000
committerLUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com>2025-12-10 11:34:40 -0800
commitb5991d71283c0c3a5d31e371d34c4d976bde63fd (patch)
tree2c5449c160b1b22805156510c47bcd90e9b2ef1e /subcmds
parent7f87c54043ce9a35a5bb60a09ee846f9d7070352 (diff)
downloadgit-repo-b5991d71283c0c3a5d31e371d34c4d976bde63fd.tar.gz
sync: Add heuristic warning for bloated shallow repositoriesv2.60
For clone-depth="1" repositories that are dirty or have local commits, add a check at the end of sync to detect excessive git object accumulation. This prevents silent performance degradation and disk exhaustion in large prebuilts repos where automatic GC is typically disabled from https://gerrit.googlesource.com/git-repo/+/7f87c54043ce9a35a5bb60a09ee846f9d7070352 Bug: 379111283 Change-Id: I376f38e1555cc6e906d852f6e63dc1c8f6331b4f Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/534701 Commit-Queue: Gavin Mak <gavinmak@google.com> Reviewed-by: Mike Frysinger <vapier@google.com> Tested-by: Gavin Mak <gavinmak@google.com>
Diffstat (limited to 'subcmds')
-rw-r--r--subcmds/sync.py104
1 files changed, 104 insertions, 0 deletions
diff --git a/subcmds/sync.py b/subcmds/sync.py
index f9500314d..b7cb1732a 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -87,6 +87,10 @@ _ONE_DAY_S = 24 * 60 * 60
87 87
88_REPO_ALLOW_SHALLOW = os.environ.get("REPO_ALLOW_SHALLOW") 88_REPO_ALLOW_SHALLOW = os.environ.get("REPO_ALLOW_SHALLOW")
89 89
90_BLOAT_PACK_COUNT_THRESHOLD = 10
91_BLOAT_SIZE_PACK_THRESHOLD_KB = 10 * 1024 * 1024 # 10 GiB in KiB
92_BLOAT_SIZE_GARBAGE_THRESHOLD_KB = 1 * 1024 * 1024 # 1 GiB in KiB
93
90logger = RepoLogger(__file__) 94logger = RepoLogger(__file__)
91 95
92 96
@@ -1371,6 +1375,104 @@ later is required to fix a server side protocol bug.
1371 t.join() 1375 t.join()
1372 pm.end() 1376 pm.end()
1373 1377
1378 @classmethod
1379 def _CheckOneBloatedProject(cls, project_index: int) -> Optional[str]:
1380 """Checks if a single project is bloated.
1381
1382 Args:
1383 project_index: The index of the project in the parallel context.
1384
1385 Returns:
1386 The name of the project if it is bloated, else None.
1387 """
1388 project = cls.get_parallel_context()["projects"][project_index]
1389
1390 if not project.Exists or not project.worktree:
1391 return None
1392
1393 # Only check dirty or locally modified projects. These can't be
1394 # freshly cloned and will accumulate garbage.
1395 try:
1396 is_dirty = project.IsDirty(consider_untracked=True)
1397
1398 manifest_rev = project.GetRevisionId(project.bare_ref.all)
1399 head_rev = project.work_git.rev_parse(HEAD)
1400 has_local_commits = manifest_rev != head_rev
1401
1402 if not (is_dirty or has_local_commits):
1403 return None
1404
1405 output = project.bare_git.count_objects("-v")
1406 except Exception:
1407 return None
1408
1409 stats = {}
1410 for line in output.splitlines():
1411 try:
1412 key, value = line.split(": ", 1)
1413 stats[key.strip()] = int(value.strip())
1414 except ValueError:
1415 pass
1416
1417 pack_count = stats.get("packs", 0)
1418 size_pack_kb = stats.get("size-pack", 0)
1419 size_garbage_kb = stats.get("size-garbage", 0)
1420
1421 is_fragmented = (
1422 pack_count > _BLOAT_PACK_COUNT_THRESHOLD
1423 and size_pack_kb > _BLOAT_SIZE_PACK_THRESHOLD_KB
1424 )
1425 has_excessive_garbage = (
1426 size_garbage_kb > _BLOAT_SIZE_GARBAGE_THRESHOLD_KB
1427 )
1428
1429 if is_fragmented or has_excessive_garbage:
1430 return project.name
1431 return None
1432
1433 def _CheckForBloatedProjects(self, projects, opt):
1434 """Check for shallow projects that are accumulating unoptimized data.
1435
1436 For projects with clone-depth="1" that are dirty (have local changes),
1437 run 'git count-objects -v' and warn if the repository is accumulating
1438 excessive pack files or garbage.
1439 """
1440 projects = [p for p in projects if p.clone_depth]
1441 if not projects:
1442 return
1443
1444 bloated_projects = []
1445 pm = Progress(
1446 "Checking for bloat", len(projects), delay=False, quiet=opt.quiet
1447 )
1448
1449 def _ProcessResults(pool, pm, results):
1450 for result in results:
1451 if result:
1452 bloated_projects.append(result)
1453 pm.update(msg="")
1454
1455 with self.ParallelContext():
1456 self.get_parallel_context()["projects"] = projects
1457 self.ExecuteInParallel(
1458 opt.jobs,
1459 self._CheckOneBloatedProject,
1460 range(len(projects)),
1461 callback=_ProcessResults,
1462 output=pm,
1463 chunksize=1,
1464 )
1465 pm.end()
1466
1467 for project_name in bloated_projects:
1468 warn_msg = (
1469 f'warning: Project "{project_name}" is accumulating '
1470 'unoptimized data. Please run "repo sync --auto-gc" or '
1471 '"repo gc --repack" to clean up.'
1472 )
1473 self.git_event_log.ErrorEvent(warn_msg)
1474 logger.warning(warn_msg)
1475
1374 def _UpdateRepoProject(self, opt, manifest, errors): 1476 def _UpdateRepoProject(self, opt, manifest, errors):
1375 """Fetch the repo project and check for updates.""" 1477 """Fetch the repo project and check for updates."""
1376 if opt.local_only: 1478 if opt.local_only:
@@ -2002,6 +2104,8 @@ later is required to fix a server side protocol bug.
2002 "experience, sync the entire tree." 2104 "experience, sync the entire tree."
2003 ) 2105 )
2004 2106
2107 self._CheckForBloatedProjects(all_projects, opt)
2108
2005 if not opt.quiet: 2109 if not opt.quiet:
2006 print("repo sync has finished successfully.") 2110 print("repo sync has finished successfully.")
2007 2111