diff options
| author | Gavin Mak <gavinmak@google.com> | 2025-12-09 22:29:43 +0000 |
|---|---|---|
| committer | LUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2025-12-10 11:34:40 -0800 |
| commit | b5991d71283c0c3a5d31e371d34c4d976bde63fd (patch) | |
| tree | 2c5449c160b1b22805156510c47bcd90e9b2ef1e /subcmds/sync.py | |
| parent | 7f87c54043ce9a35a5bb60a09ee846f9d7070352 (diff) | |
| download | git-repo-b5991d71283c0c3a5d31e371d34c4d976bde63fd.tar.gz | |
sync: Add heuristic warning for bloated shallow repositoriesv2.60
For clone-depth="1" repositories that are dirty or have local commits,
add a check at the end of sync to detect excessive git object
accumulation.
This prevents silent performance degradation and disk exhaustion in
large prebuilts repos where automatic GC is typically disabled from
https://gerrit.googlesource.com/git-repo/+/7f87c54043ce9a35a5bb60a09ee846f9d7070352
Bug: 379111283
Change-Id: I376f38e1555cc6e906d852f6e63dc1c8f6331b4f
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/534701
Commit-Queue: Gavin Mak <gavinmak@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
Tested-by: Gavin Mak <gavinmak@google.com>
Diffstat (limited to 'subcmds/sync.py')
| -rw-r--r-- | subcmds/sync.py | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/subcmds/sync.py b/subcmds/sync.py index f9500314d..b7cb1732a 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py | |||
| @@ -87,6 +87,10 @@ _ONE_DAY_S = 24 * 60 * 60 | |||
| 87 | 87 | ||
| 88 | _REPO_ALLOW_SHALLOW = os.environ.get("REPO_ALLOW_SHALLOW") | 88 | _REPO_ALLOW_SHALLOW = os.environ.get("REPO_ALLOW_SHALLOW") |
| 89 | 89 | ||
| 90 | _BLOAT_PACK_COUNT_THRESHOLD = 10 | ||
| 91 | _BLOAT_SIZE_PACK_THRESHOLD_KB = 10 * 1024 * 1024 # 10 GiB in KiB | ||
| 92 | _BLOAT_SIZE_GARBAGE_THRESHOLD_KB = 1 * 1024 * 1024 # 1 GiB in KiB | ||
| 93 | |||
| 90 | logger = RepoLogger(__file__) | 94 | logger = RepoLogger(__file__) |
| 91 | 95 | ||
| 92 | 96 | ||
| @@ -1371,6 +1375,104 @@ later is required to fix a server side protocol bug. | |||
| 1371 | t.join() | 1375 | t.join() |
| 1372 | pm.end() | 1376 | pm.end() |
| 1373 | 1377 | ||
| 1378 | @classmethod | ||
| 1379 | def _CheckOneBloatedProject(cls, project_index: int) -> Optional[str]: | ||
| 1380 | """Checks if a single project is bloated. | ||
| 1381 | |||
| 1382 | Args: | ||
| 1383 | project_index: The index of the project in the parallel context. | ||
| 1384 | |||
| 1385 | Returns: | ||
| 1386 | The name of the project if it is bloated, else None. | ||
| 1387 | """ | ||
| 1388 | project = cls.get_parallel_context()["projects"][project_index] | ||
| 1389 | |||
| 1390 | if not project.Exists or not project.worktree: | ||
| 1391 | return None | ||
| 1392 | |||
| 1393 | # Only check dirty or locally modified projects. These can't be | ||
| 1394 | # freshly cloned and will accumulate garbage. | ||
| 1395 | try: | ||
| 1396 | is_dirty = project.IsDirty(consider_untracked=True) | ||
| 1397 | |||
| 1398 | manifest_rev = project.GetRevisionId(project.bare_ref.all) | ||
| 1399 | head_rev = project.work_git.rev_parse(HEAD) | ||
| 1400 | has_local_commits = manifest_rev != head_rev | ||
| 1401 | |||
| 1402 | if not (is_dirty or has_local_commits): | ||
| 1403 | return None | ||
| 1404 | |||
| 1405 | output = project.bare_git.count_objects("-v") | ||
| 1406 | except Exception: | ||
| 1407 | return None | ||
| 1408 | |||
| 1409 | stats = {} | ||
| 1410 | for line in output.splitlines(): | ||
| 1411 | try: | ||
| 1412 | key, value = line.split(": ", 1) | ||
| 1413 | stats[key.strip()] = int(value.strip()) | ||
| 1414 | except ValueError: | ||
| 1415 | pass | ||
| 1416 | |||
| 1417 | pack_count = stats.get("packs", 0) | ||
| 1418 | size_pack_kb = stats.get("size-pack", 0) | ||
| 1419 | size_garbage_kb = stats.get("size-garbage", 0) | ||
| 1420 | |||
| 1421 | is_fragmented = ( | ||
| 1422 | pack_count > _BLOAT_PACK_COUNT_THRESHOLD | ||
| 1423 | and size_pack_kb > _BLOAT_SIZE_PACK_THRESHOLD_KB | ||
| 1424 | ) | ||
| 1425 | has_excessive_garbage = ( | ||
| 1426 | size_garbage_kb > _BLOAT_SIZE_GARBAGE_THRESHOLD_KB | ||
| 1427 | ) | ||
| 1428 | |||
| 1429 | if is_fragmented or has_excessive_garbage: | ||
| 1430 | return project.name | ||
| 1431 | return None | ||
| 1432 | |||
| 1433 | def _CheckForBloatedProjects(self, projects, opt): | ||
| 1434 | """Check for shallow projects that are accumulating unoptimized data. | ||
| 1435 | |||
| 1436 | For projects with clone-depth="1" that are dirty (have local changes), | ||
| 1437 | run 'git count-objects -v' and warn if the repository is accumulating | ||
| 1438 | excessive pack files or garbage. | ||
| 1439 | """ | ||
| 1440 | projects = [p for p in projects if p.clone_depth] | ||
| 1441 | if not projects: | ||
| 1442 | return | ||
| 1443 | |||
| 1444 | bloated_projects = [] | ||
| 1445 | pm = Progress( | ||
| 1446 | "Checking for bloat", len(projects), delay=False, quiet=opt.quiet | ||
| 1447 | ) | ||
| 1448 | |||
| 1449 | def _ProcessResults(pool, pm, results): | ||
| 1450 | for result in results: | ||
| 1451 | if result: | ||
| 1452 | bloated_projects.append(result) | ||
| 1453 | pm.update(msg="") | ||
| 1454 | |||
| 1455 | with self.ParallelContext(): | ||
| 1456 | self.get_parallel_context()["projects"] = projects | ||
| 1457 | self.ExecuteInParallel( | ||
| 1458 | opt.jobs, | ||
| 1459 | self._CheckOneBloatedProject, | ||
| 1460 | range(len(projects)), | ||
| 1461 | callback=_ProcessResults, | ||
| 1462 | output=pm, | ||
| 1463 | chunksize=1, | ||
| 1464 | ) | ||
| 1465 | pm.end() | ||
| 1466 | |||
| 1467 | for project_name in bloated_projects: | ||
| 1468 | warn_msg = ( | ||
| 1469 | f'warning: Project "{project_name}" is accumulating ' | ||
| 1470 | 'unoptimized data. Please run "repo sync --auto-gc" or ' | ||
| 1471 | '"repo gc --repack" to clean up.' | ||
| 1472 | ) | ||
| 1473 | self.git_event_log.ErrorEvent(warn_msg) | ||
| 1474 | logger.warning(warn_msg) | ||
| 1475 | |||
| 1374 | def _UpdateRepoProject(self, opt, manifest, errors): | 1476 | def _UpdateRepoProject(self, opt, manifest, errors): |
| 1375 | """Fetch the repo project and check for updates.""" | 1477 | """Fetch the repo project and check for updates.""" |
| 1376 | if opt.local_only: | 1478 | if opt.local_only: |
| @@ -2002,6 +2104,8 @@ later is required to fix a server side protocol bug. | |||
| 2002 | "experience, sync the entire tree." | 2104 | "experience, sync the entire tree." |
| 2003 | ) | 2105 | ) |
| 2004 | 2106 | ||
| 2107 | self._CheckForBloatedProjects(all_projects, opt) | ||
| 2108 | |||
| 2005 | if not opt.quiet: | 2109 | if not opt.quiet: |
| 2006 | print("repo sync has finished successfully.") | 2110 | print("repo sync has finished successfully.") |
| 2007 | 2111 | ||
