diff options
| author | Mike Frysinger <vapier@google.com> | 2021-03-01 02:06:10 -0500 |
|---|---|---|
| committer | Mike Frysinger <vapier@google.com> | 2021-03-01 15:58:06 +0000 |
| commit | 13cb7f799dc61093ed69726093a5af8bf48c65d1 (patch) | |
| tree | 8b20fcf594b0e07676dea41792d58d81d16efdd4 /subcmds | |
| parent | 819c73954f0f0d80afda86e871000e7521bfd982 (diff) | |
| download | git-repo-13cb7f799dc61093ed69726093a5af8bf48c65d1.tar.gz | |
forall: greatly speed up processing overheadv2.13
With the recent commit 0501b29e7ae072e0b10ea9ddd913ec6d5975f690
("status: Use multiprocessing for `repo status -j<num>` instead of
threading"), the limitation with project serialization no longer
applies. It turns out that ad-hoc logic is expensive. In the CrOS
checkout (~1000 projects w/8 jobs by default), it adds about ~7sec
overhead to all invocations. With a fast nop run:
time repo forall -j8 -c true
This goes from ~11sec to ~4sec -- more than 50% speedup.
Change-Id: Ie6bcccd21eef20440692751b7ebd36c890d5bbcc
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/298724
Reviewed-by: Michael Mortensen <mmortensen@google.com>
Tested-by: Mike Frysinger <vapier@google.com>
Diffstat (limited to 'subcmds')
| -rw-r--r-- | subcmds/forall.py | 59 |
1 files changed, 17 insertions, 42 deletions
diff --git a/subcmds/forall.py b/subcmds/forall.py index 24fec5ce..3e879fb9 100644 --- a/subcmds/forall.py +++ b/subcmds/forall.py | |||
| @@ -158,31 +158,6 @@ without iterating through the remaining projects. | |||
| 158 | def WantPager(self, opt): | 158 | def WantPager(self, opt): |
| 159 | return opt.project_header and opt.jobs == 1 | 159 | return opt.project_header and opt.jobs == 1 |
| 160 | 160 | ||
| 161 | def _SerializeProject(self, project): | ||
| 162 | """ Serialize a project._GitGetByExec instance. | ||
| 163 | |||
| 164 | project._GitGetByExec is not pickle-able. Instead of trying to pass it | ||
| 165 | around between processes, make a dict ourselves containing only the | ||
| 166 | attributes that we need. | ||
| 167 | |||
| 168 | """ | ||
| 169 | if not self.manifest.IsMirror: | ||
| 170 | lrev = project.GetRevisionId() | ||
| 171 | else: | ||
| 172 | lrev = None | ||
| 173 | return { | ||
| 174 | 'name': project.name, | ||
| 175 | 'relpath': project.relpath, | ||
| 176 | 'remote_name': project.remote.name, | ||
| 177 | 'lrev': lrev, | ||
| 178 | 'rrev': project.revisionExpr, | ||
| 179 | 'annotations': dict((a.name, a.value) for a in project.annotations), | ||
| 180 | 'gitdir': project.gitdir, | ||
| 181 | 'worktree': project.worktree, | ||
| 182 | 'upstream': project.upstream, | ||
| 183 | 'dest_branch': project.dest_branch, | ||
| 184 | } | ||
| 185 | |||
| 186 | def ValidateOptions(self, opt, args): | 161 | def ValidateOptions(self, opt, args): |
| 187 | if not opt.command: | 162 | if not opt.command: |
| 188 | self.Usage() | 163 | self.Usage() |
| @@ -242,7 +217,7 @@ without iterating through the remaining projects. | |||
| 242 | with multiprocessing.Pool(opt.jobs, InitWorker) as pool: | 217 | with multiprocessing.Pool(opt.jobs, InitWorker) as pool: |
| 243 | results_it = pool.imap( | 218 | results_it = pool.imap( |
| 244 | functools.partial(DoWorkWrapper, mirror, opt, cmd, shell, config), | 219 | functools.partial(DoWorkWrapper, mirror, opt, cmd, shell, config), |
| 245 | enumerate(self._SerializeProject(x) for x in projects), | 220 | enumerate(projects), |
| 246 | chunksize=WORKER_BATCH_SIZE) | 221 | chunksize=WORKER_BATCH_SIZE) |
| 247 | first = True | 222 | first = True |
| 248 | for (r, output) in results_it: | 223 | for (r, output) in results_it: |
| @@ -292,7 +267,7 @@ def DoWorkWrapper(mirror, opt, cmd, shell, config, args): | |||
| 292 | try: | 267 | try: |
| 293 | return DoWork(project, mirror, opt, cmd, shell, cnt, config) | 268 | return DoWork(project, mirror, opt, cmd, shell, cnt, config) |
| 294 | except KeyboardInterrupt: | 269 | except KeyboardInterrupt: |
| 295 | print('%s: Worker interrupted' % project['name']) | 270 | print('%s: Worker interrupted' % project.name) |
| 296 | raise WorkerKeyboardInterrupt() | 271 | raise WorkerKeyboardInterrupt() |
| 297 | 272 | ||
| 298 | 273 | ||
| @@ -304,22 +279,22 @@ def DoWork(project, mirror, opt, cmd, shell, cnt, config): | |||
| 304 | val = '' | 279 | val = '' |
| 305 | env[name] = val | 280 | env[name] = val |
| 306 | 281 | ||
| 307 | setenv('REPO_PROJECT', project['name']) | 282 | setenv('REPO_PROJECT', project.name) |
| 308 | setenv('REPO_PATH', project['relpath']) | 283 | setenv('REPO_PATH', project.relpath) |
| 309 | setenv('REPO_REMOTE', project['remote_name']) | 284 | setenv('REPO_REMOTE', project.remote.name) |
| 310 | setenv('REPO_LREV', project['lrev']) | 285 | setenv('REPO_LREV', '' if mirror else project.GetRevisionId()) |
| 311 | setenv('REPO_RREV', project['rrev']) | 286 | setenv('REPO_RREV', project.revisionExpr) |
| 312 | setenv('REPO_UPSTREAM', project['upstream']) | 287 | setenv('REPO_UPSTREAM', project.upstream) |
| 313 | setenv('REPO_DEST_BRANCH', project['dest_branch']) | 288 | setenv('REPO_DEST_BRANCH', project.dest_branch) |
| 314 | setenv('REPO_I', str(cnt + 1)) | 289 | setenv('REPO_I', str(cnt + 1)) |
| 315 | for name in project['annotations']: | 290 | for annotation in project.annotations: |
| 316 | setenv("REPO__%s" % (name), project['annotations'][name]) | 291 | setenv("REPO__%s" % (annotation.name), annotation.value) |
| 317 | 292 | ||
| 318 | if mirror: | 293 | if mirror: |
| 319 | setenv('GIT_DIR', project['gitdir']) | 294 | setenv('GIT_DIR', project.gitdir) |
| 320 | cwd = project['gitdir'] | 295 | cwd = project.gitdir |
| 321 | else: | 296 | else: |
| 322 | cwd = project['worktree'] | 297 | cwd = project.worktree |
| 323 | 298 | ||
| 324 | if not os.path.exists(cwd): | 299 | if not os.path.exists(cwd): |
| 325 | # Allow the user to silently ignore missing checkouts so they can run on | 300 | # Allow the user to silently ignore missing checkouts so they can run on |
| @@ -330,7 +305,7 @@ def DoWork(project, mirror, opt, cmd, shell, cnt, config): | |||
| 330 | output = '' | 305 | output = '' |
| 331 | if ((opt.project_header and opt.verbose) | 306 | if ((opt.project_header and opt.verbose) |
| 332 | or not opt.project_header): | 307 | or not opt.project_header): |
| 333 | output = 'skipping %s/' % project['relpath'] | 308 | output = 'skipping %s/' % project.relpath |
| 334 | return (1, output) | 309 | return (1, output) |
| 335 | 310 | ||
| 336 | if opt.verbose: | 311 | if opt.verbose: |
| @@ -350,9 +325,9 @@ def DoWork(project, mirror, opt, cmd, shell, cnt, config): | |||
| 350 | out = ForallColoring(config) | 325 | out = ForallColoring(config) |
| 351 | out.redirect(buf) | 326 | out.redirect(buf) |
| 352 | if mirror: | 327 | if mirror: |
| 353 | project_header_path = project['name'] | 328 | project_header_path = project.name |
| 354 | else: | 329 | else: |
| 355 | project_header_path = project['relpath'] | 330 | project_header_path = project.relpath |
| 356 | out.project('project %s/' % project_header_path) | 331 | out.project('project %s/' % project_header_path) |
| 357 | out.nl() | 332 | out.nl() |
| 358 | buf.write(output) | 333 | buf.write(output) |
