diff options
| -rw-r--r-- | manifest_xml.py | 58 | ||||
| -rw-r--r-- | tests/test_manifest_xml.py | 29 |
2 files changed, 74 insertions, 13 deletions
diff --git a/manifest_xml.py b/manifest_xml.py index 61b130cf..b27bf805 100644 --- a/manifest_xml.py +++ b/manifest_xml.py | |||
| @@ -117,6 +117,36 @@ def XmlInt(node, attr, default=None): | |||
| 117 | raise ManifestParseError(f'manifest: invalid {attr}="{value}" integer') | 117 | raise ManifestParseError(f'manifest: invalid {attr}="{value}" integer') |
| 118 | 118 | ||
| 119 | 119 | ||
| 120 | def normalize_url(url: str) -> str: | ||
| 121 | """Mutate input 'url' into normalized form: | ||
| 122 | |||
| 123 | * remove trailing slashes | ||
| 124 | * convert SCP-like syntax to SSH URL | ||
| 125 | |||
| 126 | Args: | ||
| 127 | url: URL to modify | ||
| 128 | |||
| 129 | Returns: | ||
| 130 | The normalized URL. | ||
| 131 | """ | ||
| 132 | |||
| 133 | url = url.rstrip("/") | ||
| 134 | parsed_url = urllib.parse.urlparse(url) | ||
| 135 | |||
| 136 | # This matches patterns like "git@github.com:foo/bar". | ||
| 137 | scp_like_url_re = r"^[^:]+@[^:]+:[^/]+/" | ||
| 138 | |||
| 139 | # If our URL is missing a schema and matches git's | ||
| 140 | # SCP-like syntax we should convert it to a proper | ||
| 141 | # SSH URL instead to make urljoin() happier. | ||
| 142 | # | ||
| 143 | # See: https://git-scm.com/docs/git-clone#URLS | ||
| 144 | if not parsed_url.scheme and re.match(scp_like_url_re, url): | ||
| 145 | return "ssh://" + url.replace(":", "/", 1) | ||
| 146 | |||
| 147 | return url | ||
| 148 | |||
| 149 | |||
| 120 | class _Default: | 150 | class _Default: |
| 121 | """Project defaults within the manifest.""" | 151 | """Project defaults within the manifest.""" |
| 122 | 152 | ||
| @@ -180,20 +210,22 @@ class _XmlRemote: | |||
| 180 | def _resolveFetchUrl(self): | 210 | def _resolveFetchUrl(self): |
| 181 | if self.fetchUrl is None: | 211 | if self.fetchUrl is None: |
| 182 | return "" | 212 | return "" |
| 183 | url = self.fetchUrl.rstrip("/") | 213 | |
| 184 | manifestUrl = self.manifestUrl.rstrip("/") | 214 | fetch_url = normalize_url(self.fetchUrl) |
| 185 | # urljoin will gets confused over quite a few things. The ones we care | 215 | manifest_url = normalize_url(self.manifestUrl) |
| 186 | # about here are: | 216 | |
| 187 | # * no scheme in the base url, like <hostname:port> | 217 | # urljoin doesn't like URLs with no scheme in the base URL |
| 188 | # We handle no scheme by replacing it with an obscure protocol, gopher | 218 | # such as file paths. We handle this by prefixing it with |
| 189 | # and then replacing it with the original when we are done. | 219 | # an obscure protocol, gopher, and replacing it with the |
| 190 | 220 | # original after urljoin | |
| 191 | if manifestUrl.find(":") != manifestUrl.find("/") - 1: | 221 | if manifest_url.find(":") != manifest_url.find("/") - 1: |
| 192 | url = urllib.parse.urljoin("gopher://" + manifestUrl, url) | 222 | fetch_url = urllib.parse.urljoin( |
| 193 | url = re.sub(r"^gopher://", "", url) | 223 | "gopher://" + manifest_url, fetch_url |
| 224 | ) | ||
| 225 | fetch_url = re.sub(r"^gopher://", "", fetch_url) | ||
| 194 | else: | 226 | else: |
| 195 | url = urllib.parse.urljoin(manifestUrl, url) | 227 | fetch_url = urllib.parse.urljoin(manifest_url, fetch_url) |
| 196 | return url | 228 | return fetch_url |
| 197 | 229 | ||
| 198 | def ToRemoteSpec(self, projectName): | 230 | def ToRemoteSpec(self, projectName): |
| 199 | fetchUrl = self.resolvedFetchUrl.rstrip("/") | 231 | fetchUrl = self.resolvedFetchUrl.rstrip("/") |
diff --git a/tests/test_manifest_xml.py b/tests/test_manifest_xml.py index 3fcf09fa..11c0c15e 100644 --- a/tests/test_manifest_xml.py +++ b/tests/test_manifest_xml.py | |||
| @@ -1128,3 +1128,32 @@ class ExtendProjectElementTests(ManifestParseTestCase): | |||
| 1128 | ) | 1128 | ) |
| 1129 | self.assertEqual(len(manifest.projects), 1) | 1129 | self.assertEqual(len(manifest.projects), 1) |
| 1130 | self.assertEqual(manifest.projects[0].upstream, "bar") | 1130 | self.assertEqual(manifest.projects[0].upstream, "bar") |
| 1131 | |||
| 1132 | |||
| 1133 | class NormalizeUrlTests(ManifestParseTestCase): | ||
| 1134 | """Tests for normalize_url() in manifest_xml.py""" | ||
| 1135 | |||
| 1136 | def test_has_trailing_slash(self): | ||
| 1137 | url = "http://foo.com/bar/baz/" | ||
| 1138 | self.assertEqual( | ||
| 1139 | "http://foo.com/bar/baz", manifest_xml.normalize_url(url) | ||
| 1140 | ) | ||
| 1141 | |||
| 1142 | def test_has_no_scheme(self): | ||
| 1143 | """Deal with cases where we have no scheme, but we also | ||
| 1144 | aren't dealing with the git SCP-like syntax | ||
| 1145 | """ | ||
| 1146 | url = "foo.com/baf/bat" | ||
| 1147 | self.assertEqual(url, manifest_xml.normalize_url(url)) | ||
| 1148 | |||
| 1149 | url = "git@foo.com/baf/bat" | ||
| 1150 | self.assertEqual(url, manifest_xml.normalize_url(url)) | ||
| 1151 | |||
| 1152 | url = "/file/path/here" | ||
| 1153 | self.assertEqual(url, manifest_xml.normalize_url(url)) | ||
| 1154 | |||
| 1155 | def test_has_no_scheme_matches_scp_like_syntax(self): | ||
| 1156 | url = "git@foo.com:bar/baf" | ||
| 1157 | self.assertEqual( | ||
| 1158 | "ssh://git@foo.com/bar/baf", manifest_xml.normalize_url(url) | ||
| 1159 | ) | ||
