diff options
| author | Gavin Mak <gavinmak@google.com> | 2025-06-11 00:13:52 +0000 | 
|---|---|---|
| committer | LUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2025-06-11 16:31:35 -0700 | 
| commit | 85352825ff3903fe17bad05476284930dbf12fd8 (patch) | |
| tree | 2970349ed8189906e4f62abde19df57c7b61114b | |
| parent | b262d0e4619c406a2708856ed312091d21c5bf39 (diff) | |
| download | git-repo-85352825ff3903fe17bad05476284930dbf12fd8.tar.gz | |
sync: Add scaffolding for interleaved sync
Prepare for an interleaved fetch and checkout mode for `repo sync`. The
goal of the new mode is to significantly speed up syncs by running fetch
and checkout operations in parallel for different projects, rather than
waiting for all fetches to complete before starting any checkouts.
Bug: 421935613
Change-Id: I8c66d1e790c7bba6280e409b95238c5e4e61a9c8
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/482821
Reviewed-by: Scott Lee <ddoman@google.com>
Commit-Queue: Gavin Mak <gavinmak@google.com>
Tested-by: Gavin Mak <gavinmak@google.com>
| -rw-r--r-- | man/repo-smartsync.1 | 5 | ||||
| -rw-r--r-- | man/repo-sync.1 | 5 | ||||
| -rw-r--r-- | subcmds/sync.py | 109 | 
3 files changed, 95 insertions, 24 deletions
| diff --git a/man/repo-smartsync.1 b/man/repo-smartsync.1 index a793b7bc..6e77f182 100644 --- a/man/repo-smartsync.1 +++ b/man/repo-smartsync.1 | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | .\" DO NOT MODIFY THIS FILE! It was generated by help2man. | 1 | .\" DO NOT MODIFY THIS FILE! It was generated by help2man. | 
| 2 | .TH REPO "1" "September 2024" "repo smartsync" "Repo Manual" | 2 | .TH REPO "1" "June 2025" "repo smartsync" "Repo Manual" | 
| 3 | .SH NAME | 3 | .SH NAME | 
| 4 | repo \- repo smartsync - manual page for repo smartsync | 4 | repo \- repo smartsync - manual page for repo smartsync | 
| 5 | .SH SYNOPSIS | 5 | .SH SYNOPSIS | 
| @@ -58,6 +58,9 @@ only update working tree, don't fetch | |||
| 58 | use the existing manifest checkout as\-is. (do not | 58 | use the existing manifest checkout as\-is. (do not | 
| 59 | update to the latest revision) | 59 | update to the latest revision) | 
| 60 | .TP | 60 | .TP | 
| 61 | \fB\-\-interleaved\fR | ||
| 62 | fetch and checkout projects in parallel (experimental) | ||
| 63 | .TP | ||
| 61 | \fB\-n\fR, \fB\-\-network\-only\fR | 64 | \fB\-n\fR, \fB\-\-network\-only\fR | 
| 62 | fetch only, don't update working tree | 65 | fetch only, don't update working tree | 
| 63 | .TP | 66 | .TP | 
| diff --git a/man/repo-sync.1 b/man/repo-sync.1 index 3ce9ec69..afaee2af 100644 --- a/man/repo-sync.1 +++ b/man/repo-sync.1 | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | .\" DO NOT MODIFY THIS FILE! It was generated by help2man. | 1 | .\" DO NOT MODIFY THIS FILE! It was generated by help2man. | 
| 2 | .TH REPO "1" "September 2024" "repo sync" "Repo Manual" | 2 | .TH REPO "1" "June 2025" "repo sync" "Repo Manual" | 
| 3 | .SH NAME | 3 | .SH NAME | 
| 4 | repo \- repo sync - manual page for repo sync | 4 | repo \- repo sync - manual page for repo sync | 
| 5 | .SH SYNOPSIS | 5 | .SH SYNOPSIS | 
| @@ -58,6 +58,9 @@ only update working tree, don't fetch | |||
| 58 | use the existing manifest checkout as\-is. (do not | 58 | use the existing manifest checkout as\-is. (do not | 
| 59 | update to the latest revision) | 59 | update to the latest revision) | 
| 60 | .TP | 60 | .TP | 
| 61 | \fB\-\-interleaved\fR | ||
| 62 | fetch and checkout projects in parallel (experimental) | ||
| 63 | .TP | ||
| 61 | \fB\-n\fR, \fB\-\-network\-only\fR | 64 | \fB\-n\fR, \fB\-\-network\-only\fR | 
| 62 | fetch only, don't update working tree | 65 | fetch only, don't update working tree | 
| 63 | .TP | 66 | .TP | 
| diff --git a/subcmds/sync.py b/subcmds/sync.py index 3a4151df..6e369a10 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py | |||
| @@ -424,6 +424,11 @@ later is required to fix a server side protocol bug. | |||
| 424 | "(do not update to the latest revision)", | 424 | "(do not update to the latest revision)", | 
| 425 | ) | 425 | ) | 
| 426 | p.add_option( | 426 | p.add_option( | 
| 427 | "--interleaved", | ||
| 428 | action="store_true", | ||
| 429 | help="fetch and checkout projects in parallel (experimental)", | ||
| 430 | ) | ||
| 431 | p.add_option( | ||
| 427 | "-n", | 432 | "-n", | 
| 428 | "--network-only", | 433 | "--network-only", | 
| 429 | action="store_true", | 434 | action="store_true", | 
| @@ -1772,8 +1777,6 @@ later is required to fix a server side protocol bug. | |||
| 1772 | e, | 1777 | e, | 
| 1773 | ) | 1778 | ) | 
| 1774 | 1779 | ||
| 1775 | err_event = multiprocessing.Event() | ||
| 1776 | |||
| 1777 | rp = manifest.repoProject | 1780 | rp = manifest.repoProject | 
| 1778 | rp.PreSync() | 1781 | rp.PreSync() | 
| 1779 | cb = rp.CurrentBranch | 1782 | cb = rp.CurrentBranch | 
| @@ -1825,6 +1828,64 @@ later is required to fix a server side protocol bug. | |||
| 1825 | all_manifests=not opt.this_manifest_only, | 1828 | all_manifests=not opt.this_manifest_only, | 
| 1826 | ) | 1829 | ) | 
| 1827 | 1830 | ||
| 1831 | if opt.interleaved: | ||
| 1832 | sync_method = self._SyncInterleaved | ||
| 1833 | else: | ||
| 1834 | sync_method = self._SyncPhased | ||
| 1835 | |||
| 1836 | sync_method( | ||
| 1837 | opt, | ||
| 1838 | args, | ||
| 1839 | errors, | ||
| 1840 | manifest, | ||
| 1841 | mp, | ||
| 1842 | all_projects, | ||
| 1843 | superproject_logging_data, | ||
| 1844 | ) | ||
| 1845 | |||
| 1846 | # Log the previous sync analysis state from the config. | ||
| 1847 | self.git_event_log.LogDataConfigEvents( | ||
| 1848 | mp.config.GetSyncAnalysisStateData(), "previous_sync_state" | ||
| 1849 | ) | ||
| 1850 | |||
| 1851 | # Update and log with the new sync analysis state. | ||
| 1852 | mp.config.UpdateSyncAnalysisState(opt, superproject_logging_data) | ||
| 1853 | self.git_event_log.LogDataConfigEvents( | ||
| 1854 | mp.config.GetSyncAnalysisStateData(), "current_sync_state" | ||
| 1855 | ) | ||
| 1856 | |||
| 1857 | self._local_sync_state.PruneRemovedProjects() | ||
| 1858 | if self._local_sync_state.IsPartiallySynced(): | ||
| 1859 | logger.warning( | ||
| 1860 | "warning: Partial syncs are not supported. For the best " | ||
| 1861 | "experience, sync the entire tree." | ||
| 1862 | ) | ||
| 1863 | |||
| 1864 | if not opt.quiet: | ||
| 1865 | print("repo sync has finished successfully.") | ||
| 1866 | |||
| 1867 | def _SyncPhased( | ||
| 1868 | self, | ||
| 1869 | opt, | ||
| 1870 | args, | ||
| 1871 | errors, | ||
| 1872 | manifest, | ||
| 1873 | mp, | ||
| 1874 | all_projects, | ||
| 1875 | superproject_logging_data, | ||
| 1876 | ): | ||
| 1877 | """Sync projects by separating network and local operations. | ||
| 1878 | |||
| 1879 | This method performs sync in two distinct, sequential phases: | ||
| 1880 | 1. Network Phase: Fetches updates for all projects from their remotes. | ||
| 1881 | 2. Local Phase: Checks out the updated revisions into the local | ||
| 1882 | worktrees for all projects. | ||
| 1883 | |||
| 1884 | This approach ensures that the local work-tree is not modified until | ||
| 1885 | all network operations are complete, providing a transactional-like | ||
| 1886 | safety net for the checkout state. | ||
| 1887 | """ | ||
| 1888 | err_event = multiprocessing.Event() | ||
| 1828 | err_network_sync = False | 1889 | err_network_sync = False | 
| 1829 | err_update_projects = False | 1890 | err_update_projects = False | 
| 1830 | err_update_linkfiles = False | 1891 | err_update_linkfiles = False | 
| @@ -1942,26 +2003,30 @@ later is required to fix a server side protocol bug. | |||
| 1942 | ) | 2003 | ) | 
| 1943 | raise SyncError(aggregate_errors=errors) | 2004 | raise SyncError(aggregate_errors=errors) | 
| 1944 | 2005 | ||
| 1945 | # Log the previous sync analysis state from the config. | 2006 | def _SyncInterleaved( | 
| 1946 | self.git_event_log.LogDataConfigEvents( | 2007 | self, | 
| 1947 | mp.config.GetSyncAnalysisStateData(), "previous_sync_state" | 2008 | opt, | 
| 1948 | ) | 2009 | args, | 
| 1949 | 2010 | errors, | |
| 1950 | # Update and log with the new sync analysis state. | 2011 | manifest, | 
| 1951 | mp.config.UpdateSyncAnalysisState(opt, superproject_logging_data) | 2012 | mp, | 
| 1952 | self.git_event_log.LogDataConfigEvents( | 2013 | all_projects, | 
| 1953 | mp.config.GetSyncAnalysisStateData(), "current_sync_state" | 2014 | superproject_logging_data, | 
| 1954 | ) | 2015 | ): | 
| 1955 | 2016 | """Sync projects by performing network and local operations in parallel. | |
| 1956 | self._local_sync_state.PruneRemovedProjects() | 2017 | |
| 1957 | if self._local_sync_state.IsPartiallySynced(): | 2018 | This method processes each project (or groups of projects that share git | 
| 1958 | logger.warning( | 2019 | objects) independently. For each project, it performs the fetch and | 
| 1959 | "warning: Partial syncs are not supported. For the best " | 2020 | checkout operations back-to-back. These independent tasks are run in | 
| 1960 | "experience, sync the entire tree." | 2021 | parallel. | 
| 1961 | ) | 2022 | |
| 1962 | 2023 | It respects two constraints for correctness: | |
| 1963 | if not opt.quiet: | 2024 | 1. Projects in nested directories (e.g. 'foo' and 'foo/bar') are | 
| 1964 | print("repo sync has finished successfully.") | 2025 | processed in hierarchical order. | 
| 2026 | 2. Projects that share git objects are processed serially to prevent | ||
| 2027 | race conditions. | ||
| 2028 | """ | ||
| 2029 | raise NotImplementedError("Interleaved sync is not implemented yet.") | ||
| 1965 | 2030 | ||
| 1966 | 2031 | ||
| 1967 | def _PostRepoUpgrade(manifest, quiet=False): | 2032 | def _PostRepoUpgrade(manifest, quiet=False): | 
