From 88807c7062788254f654ea8c03427adc859321f0 Mon Sep 17 00:00:00 2001 From: Jason R. Coombs Date: Mon Apr 29 20:01:38 2024 -0400 Subject: [PATCH] Merge pull request #4332 from pypa/debt/package-index-vcs Modernize package_index VCS handling CVE: CVE-2024-6345 Upstream-Status: Backport [https://github.com/pypa/setuptools/commit/88807c7062788254f654ea8c03427adc859321f0] Signed-off-by: Soumya Sambu --- setup.cfg | 1 + setuptools/package_index.py | 145 ++++++++++++++------------ setuptools/tests/test_packageindex.py | 56 +++++----- 3 files changed, 106 insertions(+), 96 deletions(-) diff --git a/setup.cfg b/setup.cfg index edf9798..238d00a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -65,6 +65,7 @@ testing = sys_platform != "cygwin" jaraco.develop >= 7.21; python_version >= "3.9" and sys_platform != "cygwin" pytest-home >= 0.5 + pytest-subprocess testing-integration = pytest pytest-xdist diff --git a/setuptools/package_index.py b/setuptools/package_index.py index 271aa97..00a972d 100644 --- a/setuptools/package_index.py +++ b/setuptools/package_index.py @@ -1,6 +1,7 @@ """PyPI and direct package downloading.""" import sys +import subprocess import os import re import io @@ -585,7 +586,7 @@ class PackageIndex(Environment): scheme = URL_SCHEME(spec) if scheme: # It's a url, download it to tmpdir - found = self._download_url(scheme.group(1), spec, tmpdir) + found = self._download_url(spec, tmpdir) base, fragment = egg_info_for_url(spec) if base.endswith('.py'): found = self.gen_setup(found, fragment, tmpdir) @@ -814,7 +815,7 @@ class PackageIndex(Environment): else: raise DistutilsError("Download error for %s: %s" % (url, v)) from v - def _download_url(self, scheme, url, tmpdir): + def _download_url(self, url, tmpdir): # Determine download filename # name, fragment = egg_info_for_url(url) @@ -829,19 +830,59 @@ class PackageIndex(Environment): filename = os.path.join(tmpdir, name) - # Download the file - # - if scheme == 'svn' or scheme.startswith('svn+'): - return self._download_svn(url, filename) - elif scheme == 'git' or scheme.startswith('git+'): - return self._download_git(url, filename) - elif scheme.startswith('hg+'): - return self._download_hg(url, filename) - elif scheme == 'file': - return urllib.request.url2pathname(urllib.parse.urlparse(url)[2]) - else: - self.url_ok(url, True) # raises error if not allowed - return self._attempt_download(url, filename) + return self._download_vcs(url, filename) or self._download_other(url, filename) + + @staticmethod + def _resolve_vcs(url): + """ + >>> rvcs = PackageIndex._resolve_vcs + >>> rvcs('git+http://foo/bar') + 'git' + >>> rvcs('hg+https://foo/bar') + 'hg' + >>> rvcs('git:myhost') + 'git' + >>> rvcs('hg:myhost') + >>> rvcs('http://foo/bar') + """ + scheme = urllib.parse.urlsplit(url).scheme + pre, sep, post = scheme.partition('+') + # svn and git have their own protocol; hg does not + allowed = set(['svn', 'git'] + ['hg'] * bool(sep)) + return next(iter({pre} & allowed), None) + + def _download_vcs(self, url, spec_filename): + vcs = self._resolve_vcs(url) + if not vcs: + return + if vcs == 'svn': + raise DistutilsError( + f"Invalid config, SVN download is not supported: {url}" + ) + + filename, _, _ = spec_filename.partition('#') + url, rev = self._vcs_split_rev_from_url(url) + + self.info(f"Doing {vcs} clone from {url} to {filename}") + subprocess.check_call([vcs, 'clone', '--quiet', url, filename]) + + co_commands = dict( + git=[vcs, '-C', filename, 'checkout', '--quiet', rev], + hg=[vcs, '--cwd', filename, 'up', '-C', '-r', rev, '-q'], + ) + if rev is not None: + self.info(f"Checking out {rev}") + subprocess.check_call(co_commands[vcs]) + + return filename + + def _download_other(self, url, filename): + scheme = urllib.parse.urlsplit(url).scheme + if scheme == 'file': # pragma: no cover + return urllib.request.url2pathname(urllib.parse.urlparse(url).path) + # raise error if not allowed + self.url_ok(url, True) + return self._attempt_download(url, filename) def scan_url(self, url): self.process_url(url, True) @@ -857,64 +898,36 @@ class PackageIndex(Environment): os.unlink(filename) raise DistutilsError(f"Unexpected HTML page found at {url}") - def _download_svn(self, url, _filename): - raise DistutilsError(f"Invalid config, SVN download is not supported: {url}") - @staticmethod - def _vcs_split_rev_from_url(url, pop_prefix=False): - scheme, netloc, path, query, frag = urllib.parse.urlsplit(url) + def _vcs_split_rev_from_url(url): + """ + Given a possible VCS URL, return a clean URL and resolved revision if any. + >>> vsrfu = PackageIndex._vcs_split_rev_from_url + >>> vsrfu('git+https://github.com/pypa/setuptools@v69.0.0#egg-info=setuptools') + ('https://github.com/pypa/setuptools', 'v69.0.0') + >>> vsrfu('git+https://github.com/pypa/setuptools#egg-info=setuptools') + ('https://github.com/pypa/setuptools', None) + >>> vsrfu('http://foo/bar') + ('http://foo/bar', None) + """ + parts = urllib.parse.urlsplit(url) - scheme = scheme.split('+', 1)[-1] + clean_scheme = parts.scheme.split('+', 1)[-1] # Some fragment identification fails - path = path.split('#', 1)[0] - - rev = None - if '@' in path: - path, rev = path.rsplit('@', 1) - - # Also, discard fragment - url = urllib.parse.urlunsplit((scheme, netloc, path, query, '')) - - return url, rev - - def _download_git(self, url, filename): - filename = filename.split('#', 1)[0] - url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) - - self.info("Doing git clone from %s to %s", url, filename) - os.system("git clone --quiet %s %s" % (url, filename)) - - if rev is not None: - self.info("Checking out %s", rev) - os.system( - "git -C %s checkout --quiet %s" - % ( - filename, - rev, - ) - ) + no_fragment_path, _, _ = parts.path.partition('#') - return filename + pre, sep, post = no_fragment_path.rpartition('@') + clean_path, rev = (pre, post) if sep else (post, None) - def _download_hg(self, url, filename): - filename = filename.split('#', 1)[0] - url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True) + resolved = parts._replace( + scheme=clean_scheme, + path=clean_path, + # discard the fragment + fragment='', + ).geturl() - self.info("Doing hg clone from %s to %s", url, filename) - os.system("hg clone --quiet %s %s" % (url, filename)) - - if rev is not None: - self.info("Updating to %s", rev) - os.system( - "hg --cwd %s up -C -r %s -q" - % ( - filename, - rev, - ) - ) - - return filename + return resolved, rev def debug(self, msg, *args): log.debug(msg, *args) diff --git a/setuptools/tests/test_packageindex.py b/setuptools/tests/test_packageindex.py index 41b9661..e4cd91a 100644 --- a/setuptools/tests/test_packageindex.py +++ b/setuptools/tests/test_packageindex.py @@ -2,7 +2,6 @@ import distutils.errors import urllib.request import urllib.error import http.client -from unittest import mock import pytest @@ -171,49 +170,46 @@ class TestPackageIndex: assert dists[0].version == '' assert dists[1].version == vc - def test_download_git_with_rev(self, tmpdir): + def test_download_git_with_rev(self, tmp_path, fp): url = 'git+https://github.example/group/project@master#egg=foo' index = setuptools.package_index.PackageIndex() - with mock.patch("os.system") as os_system_mock: - result = index.download(url, str(tmpdir)) + expected_dir = tmp_path / 'project@master' + fp.register([ + 'git', + 'clone', + '--quiet', + 'https://github.example/group/project', + expected_dir, + ]) + fp.register(['git', '-C', expected_dir, 'checkout', '--quiet', 'master']) - os_system_mock.assert_called() + result = index.download(url, tmp_path) - expected_dir = str(tmpdir / 'project@master') - expected = ( - 'git clone --quiet ' 'https://github.example/group/project {expected_dir}' - ).format(**locals()) - first_call_args = os_system_mock.call_args_list[0][0] - assert first_call_args == (expected,) + assert result == str(expected_dir) + assert len(fp.calls) == 2 - tmpl = 'git -C {expected_dir} checkout --quiet master' - expected = tmpl.format(**locals()) - assert os_system_mock.call_args_list[1][0] == (expected,) - assert result == expected_dir - - def test_download_git_no_rev(self, tmpdir): + def test_download_git_no_rev(self, tmp_path, fp): url = 'git+https://github.example/group/project#egg=foo' index = setuptools.package_index.PackageIndex() - with mock.patch("os.system") as os_system_mock: - result = index.download(url, str(tmpdir)) - - os_system_mock.assert_called() - - expected_dir = str(tmpdir / 'project') - expected = ( - 'git clone --quiet ' 'https://github.example/group/project {expected_dir}' - ).format(**locals()) - os_system_mock.assert_called_once_with(expected) - - def test_download_svn(self, tmpdir): + expected_dir = tmp_path / 'project' + fp.register([ + 'git', + 'clone', + '--quiet', + 'https://github.example/group/project', + expected_dir, + ]) + index.download(url, tmp_path) + + def test_download_svn(self, tmp_path): url = 'svn+https://svn.example/project#egg=foo' index = setuptools.package_index.PackageIndex() msg = r".*SVN download is not supported.*" with pytest.raises(distutils.errors.DistutilsError, match=msg): - index.download(url, str(tmpdir)) + index.download(url, tmp_path) class TestContentCheckers: -- 2.40.0