diff options
| author | Josip Sokcevic <sokcevic@chromium.org> | 2025-01-14 19:20:21 +0000 | 
|---|---|---|
| committer | LUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2025-01-14 15:17:34 -0800 | 
| commit | 3405446a4eb382467ef539764f6a31869fd1ce43 (patch) | |
| tree | 86499dc07d72afc4bea0eea51cc063e2bcdaa700 | |
| parent | 41a27eb854b011f1506cbf984645df5a0f67ad00 (diff) | |
| download | git-repo-3405446a4eb382467ef539764f6a31869fd1ce43.tar.gz | |
gc: Add repack option
When a repository is partially cloned, no longer needed blobs are never
removed. To reclaim some of disk space, allow user to pass --repack
which affects only repositories with filter=blob:none and if projects
are not shared.
Change-Id: I0608172c9eff82fb8a6b6ef703eb109fedb7a6cc
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/447722
Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>
Tested-by: Josip Sokcevic <sokcevic@chromium.org>
Reviewed-by: Scott Lee <ddoman@google.com>
| -rw-r--r-- | subcmds/gc.py | 162 | 
1 files changed, 160 insertions, 2 deletions
| diff --git a/subcmds/gc.py b/subcmds/gc.py index 14d9675c..0831dc3c 100644 --- a/subcmds/gc.py +++ b/subcmds/gc.py | |||
| @@ -16,6 +16,7 @@ import os | |||
| 16 | from typing import List, Set | 16 | from typing import List, Set | 
| 17 | 17 | ||
| 18 | from command import Command | 18 | from command import Command | 
| 19 | from git_command import GitCommand | ||
| 19 | import platform_utils | 20 | import platform_utils | 
| 20 | from progress import Progress | 21 | from progress import Progress | 
| 21 | from project import Project | 22 | from project import Project | 
| @@ -23,7 +24,7 @@ from project import Project | |||
| 23 | 24 | ||
| 24 | class Gc(Command): | 25 | class Gc(Command): | 
| 25 | COMMON = True | 26 | COMMON = True | 
| 26 | helpSummary = "Cleaning up internal repo state." | 27 | helpSummary = "Cleaning up internal repo and Git state." | 
| 27 | helpUsage = """ | 28 | helpUsage = """ | 
| 28 | %prog | 29 | %prog | 
| 29 | """ | 30 | """ | 
| @@ -44,6 +45,13 @@ class Gc(Command): | |||
| 44 | action="store_true", | 45 | action="store_true", | 
| 45 | help="answer yes to all safe prompts", | 46 | help="answer yes to all safe prompts", | 
| 46 | ) | 47 | ) | 
| 48 | p.add_option( | ||
| 49 | "--repack", | ||
| 50 | default=False, | ||
| 51 | action="store_true", | ||
| 52 | help="repack all projects that use partial clone with " | ||
| 53 | "filter=blob:none", | ||
| 54 | ) | ||
| 47 | 55 | ||
| 48 | def _find_git_to_delete( | 56 | def _find_git_to_delete( | 
| 49 | self, to_keep: Set[str], start_dir: str | 57 | self, to_keep: Set[str], start_dir: str | 
| @@ -126,9 +134,159 @@ class Gc(Command): | |||
| 126 | 134 | ||
| 127 | return 0 | 135 | return 0 | 
| 128 | 136 | ||
| 137 | def _generate_promisor_files(self, pack_dir: str): | ||
| 138 | """Generates promisor files for all pack files in the given directory. | ||
| 139 | |||
| 140 | Promisor files are empty files with the same name as the corresponding | ||
| 141 | pack file but with the ".promisor" extension. They are used by Git. | ||
| 142 | """ | ||
| 143 | for root, _, files in platform_utils.walk(pack_dir): | ||
| 144 | for file in files: | ||
| 145 | if not file.endswith(".pack"): | ||
| 146 | continue | ||
| 147 | with open(os.path.join(root, f"{file[:-4]}promisor"), "w"): | ||
| 148 | pass | ||
| 149 | |||
| 150 | def repack_projects(self, projects: List[Project], opt): | ||
| 151 | repack_projects = [] | ||
| 152 | # Find all projects eligible for repacking: | ||
| 153 | # - can't be shared | ||
| 154 | # - have a specific fetch filter | ||
| 155 | for project in projects: | ||
| 156 | if project.config.GetBoolean("extensions.preciousObjects"): | ||
| 157 | continue | ||
| 158 | if not project.clone_depth: | ||
| 159 | continue | ||
| 160 | if project.manifest.CloneFilterForDepth != "blob:none": | ||
| 161 | continue | ||
| 162 | |||
| 163 | repack_projects.append(project) | ||
| 164 | |||
| 165 | if opt.dryrun: | ||
| 166 | print(f"Would have repacked {len(repack_projects)} projects.") | ||
| 167 | return 0 | ||
| 168 | |||
| 169 | pm = Progress( | ||
| 170 | "Repacking (this will take a while)", | ||
| 171 | len(repack_projects), | ||
| 172 | delay=False, | ||
| 173 | quiet=opt.quiet, | ||
| 174 | show_elapsed=True, | ||
| 175 | elide=True, | ||
| 176 | ) | ||
| 177 | |||
| 178 | for project in repack_projects: | ||
| 179 | pm.update(msg=f"{project.name}") | ||
| 180 | |||
| 181 | pack_dir = os.path.join(project.gitdir, "tmp_repo_repack") | ||
| 182 | if os.path.isdir(pack_dir): | ||
| 183 | platform_utils.rmtree(pack_dir) | ||
| 184 | os.mkdir(pack_dir) | ||
| 185 | |||
| 186 | # Prepare workspace for repacking - remove all unreachable refs and | ||
| 187 | # their objects. | ||
| 188 | GitCommand( | ||
| 189 | project, | ||
| 190 | ["reflog", "expire", "--expire-unreachable=all"], | ||
| 191 | verify_command=True, | ||
| 192 | ).Wait() | ||
| 193 | pm.update(msg=f"{project.name} | gc", inc=0) | ||
| 194 | GitCommand( | ||
| 195 | project, | ||
| 196 | ["gc"], | ||
| 197 | verify_command=True, | ||
| 198 | ).Wait() | ||
| 199 | |||
| 200 | # Get all objects that are reachable from the remote, and pack them. | ||
| 201 | pm.update(msg=f"{project.name} | generating list of objects", inc=0) | ||
| 202 | remote_objects_cmd = GitCommand( | ||
| 203 | project, | ||
| 204 | [ | ||
| 205 | "rev-list", | ||
| 206 | "--objects", | ||
| 207 | f"--remotes={project.remote.name}", | ||
| 208 | "--filter=blob:none", | ||
| 209 | ], | ||
| 210 | capture_stdout=True, | ||
| 211 | verify_command=True, | ||
| 212 | ) | ||
| 213 | |||
| 214 | # Get all local objects and pack them. | ||
| 215 | local_head_objects_cmd = GitCommand( | ||
| 216 | project, | ||
| 217 | ["rev-list", "--objects", "HEAD^{tree}"], | ||
| 218 | capture_stdout=True, | ||
| 219 | verify_command=True, | ||
| 220 | ) | ||
| 221 | local_objects_cmd = GitCommand( | ||
| 222 | project, | ||
| 223 | [ | ||
| 224 | "rev-list", | ||
| 225 | "--objects", | ||
| 226 | "--all", | ||
| 227 | "--reflog", | ||
| 228 | "--indexed-objects", | ||
| 229 | "--not", | ||
| 230 | f"--remotes={project.remote.name}", | ||
| 231 | ], | ||
| 232 | capture_stdout=True, | ||
| 233 | verify_command=True, | ||
| 234 | ) | ||
| 235 | |||
| 236 | remote_objects_cmd.Wait() | ||
| 237 | |||
| 238 | pm.update(msg=f"{project.name} | remote repack", inc=0) | ||
| 239 | GitCommand( | ||
| 240 | project, | ||
| 241 | ["pack-objects", os.path.join(pack_dir, "pack")], | ||
| 242 | input=remote_objects_cmd.stdout, | ||
| 243 | capture_stderr=True, | ||
| 244 | capture_stdout=True, | ||
| 245 | verify_command=True, | ||
| 246 | ).Wait() | ||
| 247 | |||
| 248 | # create promisor file for each pack file | ||
| 249 | self._generate_promisor_files(pack_dir) | ||
| 250 | |||
| 251 | local_head_objects_cmd.Wait() | ||
| 252 | local_objects_cmd.Wait() | ||
| 253 | |||
| 254 | pm.update(msg=f"{project.name} | local repack", inc=0) | ||
| 255 | GitCommand( | ||
| 256 | project, | ||
| 257 | ["pack-objects", os.path.join(pack_dir, "pack")], | ||
| 258 | input=local_head_objects_cmd.stdout + local_objects_cmd.stdout, | ||
| 259 | capture_stderr=True, | ||
| 260 | capture_stdout=True, | ||
| 261 | verify_command=True, | ||
| 262 | ).Wait() | ||
| 263 | |||
| 264 | # Swap the old pack directory with the new one. | ||
| 265 | platform_utils.rename( | ||
| 266 | os.path.join(project.objdir, "objects", "pack"), | ||
| 267 | os.path.join(project.objdir, "objects", "pack_old"), | ||
| 268 | ) | ||
| 269 | platform_utils.rename( | ||
| 270 | pack_dir, | ||
| 271 | os.path.join(project.objdir, "objects", "pack"), | ||
| 272 | ) | ||
| 273 | platform_utils.rmtree( | ||
| 274 | os.path.join(project.objdir, "objects", "pack_old") | ||
| 275 | ) | ||
| 276 | |||
| 277 | pm.end() | ||
| 278 | return 0 | ||
| 279 | |||
| 129 | def Execute(self, opt, args): | 280 | def Execute(self, opt, args): | 
| 130 | projects: List[Project] = self.GetProjects( | 281 | projects: List[Project] = self.GetProjects( | 
| 131 | args, all_manifests=not opt.this_manifest_only | 282 | args, all_manifests=not opt.this_manifest_only | 
| 132 | ) | 283 | ) | 
| 133 | 284 | ||
| 134 | return self.delete_unused_projects(projects, opt) | 285 | ret = self.delete_unused_projects(projects, opt) | 
| 286 | if ret != 0: | ||
| 287 | return ret | ||
| 288 | |||
| 289 | if not opt.repack: | ||
| 290 | return | ||
| 291 | |||
| 292 | return self.repack_projects(projects, opt) | ||
