1 files changed, 171 insertions, 95 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index 6d82f3af07..4d19e2134b 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -26,7 +26,6 @@ from   bb.fetch2 import FetchMethod
 from   bb.fetch2 import FetchError
 from   bb.fetch2 import logger
 from   bb.fetch2 import runfetchcmd
-from   bb.utils import export_proxies
 from   bs4 import BeautifulSoup
 from   bs4 import SoupStrainer
@@ -52,18 +51,19 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
 class Wget(FetchMethod):
+    """Class to fetch urls via 'wget'"""
-    # CDNs like CloudFlare may do a 'browser integrity test' which can fail
+    def check_certs(self, d):
-    # with the standard wget/urllib User-Agent, so pretend to be a modern
+        """
-    # browser.
+        Should certificates be checked?
-    user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
+        """
+        return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
-    """Class to fetch urls via 'wget'"""
    def supports(self, ud, d):
        """
        Check to see if a given url can be fetched with wget.
        """
-        return ud.type in ['http', 'https', 'ftp']
+        return ud.type in ['http', 'https', 'ftp', 'ftps']
    def recommends_checksum(self, urldata):
        return True
@@ -78,11 +78,17 @@ class Wget(FetchMethod):
        else:
            ud.basename = os.path.basename(ud.path)
-        ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
+        ud.localfile = ud.basename
        if not ud.localfile:
-            ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
+            ud.localfile = ud.host + ud.path.replace("/", ".")
-        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
+        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
+        if ud.type == 'ftp' or ud.type == 'ftps':
+            self.basecmd += " --passive-ftp"
+        if not self.check_certs(d):
+            self.basecmd += " --no-check-certificate"
    def _runwget(self, ud, d, command, quiet, workdir=None):
@@ -90,39 +96,53 @@ class Wget(FetchMethod):
        logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
        bb.fetch2.check_network_access(d, command, ud.url)
-        runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
+        runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
    def download(self, ud, d):
        """Fetch urls"""
        fetchcmd = self.basecmd
-        if 'downloadfilename' in ud.parm:
+        dldir = os.path.realpath(d.getVar("DL_DIR"))
-            localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
+        localpath = os.path.join(dldir, ud.localfile) + ".tmp"
-            bb.utils.mkdirhier(os.path.dirname(localpath))
+        bb.utils.mkdirhier(os.path.dirname(localpath))
-            fetchcmd += " -O %s" % shlex.quote(localpath)
+        fetchcmd += " --output-document=%s" % shlex.quote(localpath)
        if ud.user and ud.pswd:
-            fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
+            fetchcmd += " --auth-no-challenge"
+            if ud.parm.get("redirectauth", "1") == "1":
+                # An undocumented feature of wget is that if the
+                # username/password are specified on the URI, wget will only
+                # send the Authorization header to the first host and not to
+                # any hosts that it is redirected to.  With the increasing
+                # usage of temporary AWS URLs, this difference now matters as
+                # AWS will reject any request that has authentication both in
+                # the query parameters (from the redirect) and in the
+                # Authorization header.
+                fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
        uri = ud.url.split(";")[0]
-        if os.path.exists(ud.localpath):
+        fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
-            # file exists, but we didnt complete it.. trying again..
-            fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
-        else:
-            fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
        self._runwget(ud, d, fetchcmd, False)
        # Sanity check since wget can pretend it succeed when it didn't
        # Also, this used to happen if sourceforge sent us to the mirror page
-        if not os.path.exists(ud.localpath):
+        if not os.path.exists(localpath):
-            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
+            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
-        if os.path.getsize(ud.localpath) == 0:
+        if os.path.getsize(localpath) == 0:
-            os.remove(ud.localpath)
+            os.remove(localpath)
            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
+        # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
+        # original file, which might be a race (imagine two recipes referencing the same
+        # source, one with an incorrect checksum)
+        bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
+        # Remove the ".tmp" and move the file into position atomically
+        # Our lock prevents multiple writers but mirroring code may grab incomplete files
+        os.rename(localpath, localpath[:-4])
        return True
    def checkstatus(self, fetch, ud, d, try_again=True):
@@ -209,12 +229,17 @@ class Wget(FetchMethod):
                        # We let the request fail and expect it to be
                        # tried once more ("try_again" in check_status()),
                        # with the dead connection removed from the cache.
-                        # If it still fails, we give up, which can happend for bad
+                        # If it still fails, we give up, which can happen for bad
                        # HTTP proxy settings.
                        fetch.connection_cache.remove_connection(h.host, h.port)
                    raise urllib.error.URLError(err)
                else:
-                    r = h.getresponse()
+                    try:
+                        r = h.getresponse()
+                    except TimeoutError as e:
+                        if fetch.connection_cache:
+                            fetch.connection_cache.remove_connection(h.host, h.port)
+                        raise TimeoutError(e)
                # Pick apart the HTTPResponse object to get the addinfourl
                # object initialized properly.
@@ -275,71 +300,118 @@ class Wget(FetchMethod):
        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
            """
-            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
+            urllib2.HTTPRedirectHandler before 3.13 has two flaws:
-            when we want to follow redirects using the original method.
+            
+            It resets the method to GET on redirect when we want to follow
+            redirects using the original method (typically HEAD). This was fixed
+            in 759e8e7.
+            It also doesn't handle 308 (Permanent Redirect). This was fixed in
+            c379bc5.
+            Until we depend on Python 3.13 onwards, copy the redirect_request
+            method to fix these issues.
            """
            def redirect_request(self, req, fp, code, msg, headers, newurl):
-                newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
+                m = req.get_method()
-                newreq.get_method = req.get_method
+                if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
-                return newreq
+                    or code in (301, 302, 303) and m == "POST")):
-        exported_proxies = export_proxies(d)
+                    raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
-        handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
+                # Strictly (according to RFC 2616), 301 or 302 in response to
-        if exported_proxies:
+                # a POST MUST NOT cause a redirection without confirmation
-            handlers.append(urllib.request.ProxyHandler())
+                # from the user (of urllib.request, in this case).  In practice,
-        handlers.append(CacheHTTPHandler())
+                # essentially all clients do redirect in this case, so we do
-        # Since Python 2.7.9 ssl cert validation is enabled by default
+                # the same.
-        # see PEP-0476, this causes verification errors on some https servers
-        # so disable by default.
+                # Be conciliant with URIs containing a space.  This is mainly
-        import ssl
+                # redundant with the more complete encoding done in http_error_302(),
-        if hasattr(ssl, '_create_unverified_context'):
+                # but it is kept for compatibility with other callers.
-            handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
+                newurl = newurl.replace(' ', '%20')
-        opener = urllib.request.build_opener(*handlers)
+                CONTENT_HEADERS = ("content-length", "content-type")
-        try:
+                newheaders = {k: v for k, v in req.headers.items()
-            uri = ud.url.split(";")[0]
+                            if k.lower() not in CONTENT_HEADERS}
-            r = urllib.request.Request(uri)
+                return urllib.request.Request(newurl,
-            r.get_method = lambda: "HEAD"
+                            method="HEAD" if m == "HEAD" else "GET",
-            # Some servers (FusionForge, as used on Alioth) require that the
+                            headers=newheaders,
-            # optional Accept header is set.
+                            origin_req_host=req.origin_req_host,
-            r.add_header("Accept", "*/*")
+                            unverifiable=True)
-            r.add_header("User-Agent", self.user_agent)
-            def add_basic_auth(login_str, request):
+            http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
-                '''Adds Basic auth to http request, pass in login:password as string'''
-                import base64
+        # We need to update the environment here as both the proxy and HTTPS
-                encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
+        # handlers need variables set. The proxy needs http_proxy and friends to
-                authheader = "Basic %s" % encodeuser
+        # be set, and HTTPSHandler ends up calling into openssl to load the
-                r.add_header("Authorization", authheader)
+        # certificates. In buildtools configurations this will be looking at the
+        # wrong place for certificates by default: we set SSL_CERT_FILE to the
-            if ud.user and ud.pswd:
+        # right location in the buildtools environment script but as BitBake
-                add_basic_auth(ud.user + ':' + ud.pswd, r)
+        # prunes prunes the environment this is lost. When binaries are executed
+        # runfetchcmd ensures these values are in the environment, but this is
+        # pure Python so we need to update the environment.
+        #
+        # Avoid tramping the environment too much by using bb.utils.environment
+        # to scope the changes to the build_opener request, which is when the
+        # environment lookups happen.
+        newenv = bb.fetch2.get_fetcher_environment(d)
+        with bb.utils.environment(**newenv):
+            import ssl
+            if self.check_certs(d):
+                context = ssl.create_default_context()
+            else:
+                context = ssl._create_unverified_context()
+            handlers = [FixedHTTPRedirectHandler,
+                        HTTPMethodFallback,
+                        urllib.request.ProxyHandler(),
+                        CacheHTTPHandler(),
+                        urllib.request.HTTPSHandler(context=context)]
+            opener = urllib.request.build_opener(*handlers)
            try:
-                import netrc
+                parts = urllib.parse.urlparse(ud.url.split(";")[0])
-                n = netrc.netrc()
+                if parts.query:
-                login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
+                    uri = "{}://{}{}?{}".format(parts.scheme, parts.netloc, parts.path, parts.query)
-                add_basic_auth("%s:%s" % (login, password), r)
+                else:
-            except (TypeError, ImportError, IOError, netrc.NetrcParseError):
+                    uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
-                pass
+                r = urllib.request.Request(uri)
+                r.get_method = lambda: "HEAD"
-            with opener.open(r) as response:
+                # Some servers (FusionForge, as used on Alioth) require that the
-                pass
+                # optional Accept header is set.
-        except urllib.error.URLError as e:
+                r.add_header("Accept", "*/*")
-            if try_again:
+                r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
-                logger.debug2("checkstatus: trying again")
+                def add_basic_auth(login_str, request):
-                return self.checkstatus(fetch, ud, d, False)
+                    '''Adds Basic auth to http request, pass in login:password as string'''
-            else:
+                    import base64
-                # debug for now to avoid spamming the logs in e.g. remote sstate searches
+                    encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
-                logger.debug2("checkstatus() urlopen failed: %s" % e)
+                    authheader = "Basic %s" % encodeuser
-                return False
+                    r.add_header("Authorization", authheader)
-        except ConnectionResetError as e:
-            if try_again:
+                if ud.user and ud.pswd:
-                logger.debug2("checkstatus: trying again")
+                    add_basic_auth(ud.user + ':' + ud.pswd, r)
-                return self.checkstatus(fetch, ud, d, False)
-            else:
+                try:
-                # debug for now to avoid spamming the logs in e.g. remote sstate searches
+                    import netrc
-                logger.debug2("checkstatus() urlopen failed: %s" % e)
+                    auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
-                return False
+                    if auth_data:
+                        login, _, password = auth_data
+                        add_basic_auth("%s:%s" % (login, password), r)
+                except (FileNotFoundError, netrc.NetrcParseError):
+                    pass
+                with opener.open(r, timeout=100) as response:
+                    pass
+            except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
+                if try_again:
+                    logger.debug2("checkstatus: trying again")
+                    return self.checkstatus(fetch, ud, d, False)
+                else:
+                    # debug for now to avoid spamming the logs in e.g. remote sstate searches
+                    logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
+                    return False
        return True
    def _parse_path(self, regex, s):
@@ -416,7 +488,7 @@ class Wget(FetchMethod):
        f = tempfile.NamedTemporaryFile()
        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
            fetchcmd = self.basecmd
-            fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
+            fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
            try:
                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
                fetchresult = f.read()
@@ -472,7 +544,7 @@ class Wget(FetchMethod):
        version_dir = ['', '', '']
        version = ['', '', '']
-        dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
+        dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
        s = dirver_regex.search(dirver)
        if s:
            version_dir[1] = s.group('ver')
@@ -548,7 +620,7 @@ class Wget(FetchMethod):
        # src.rpm extension was added only for rpm package. Can be removed if the rpm
        # packaged will always be considered as having to be manually upgraded
-        psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
+        psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
        # match name, version and archive type of a package
        package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
@@ -576,13 +648,17 @@ class Wget(FetchMethod):
        sanity check to ensure same name and type.
        """
-        package = ud.path.split("/")[-1]
+        if 'downloadfilename' in ud.parm:
+            package = ud.parm['downloadfilename']
+        else:
+            package = ud.path.split("/")[-1]
        current_version = ['', d.getVar('PV'), '']
        """possible to have no version in pkg name, such as spectrum-fw"""
        if not re.search(r"\d+", package):
            current_version[1] = re.sub('_', '.', current_version[1])
            current_version[1] = re.sub('-', '.', current_version[1])
+            bb.debug(3, "latest_versionstring: no version found in %s" % package)
            return (current_version[1], '')
        package_regex = self._init_regexes(package, ud, d)
@@ -599,10 +675,10 @@ class Wget(FetchMethod):
            # search for version matches on folders inside the path, like:
            # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
            dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
-            m = dirver_regex.search(path)
+            m = dirver_regex.findall(path)
            if m:
                pn = d.getVar('PN')
-                dirver = m.group('dirver')
+                dirver = m[-1][0]
                dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
                if not dirver_pn_regex.search(dirver):

diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py index 6d82f3af07..4d19e2134b 100644 --- a/bitbake/lib/bb/fetch2/wget.py +++ b/bitbake/lib/bb/fetch2/wget.py
@@ -26,7 +26,6 @@ from bb.fetch2 import FetchMethod
26	from bb.fetch2 import FetchError	26	from bb.fetch2 import FetchError
27	from bb.fetch2 import logger	27	from bb.fetch2 import logger
28	from bb.fetch2 import runfetchcmd	28	from bb.fetch2 import runfetchcmd
29	from bb.utils import export_proxies
30	from bs4 import BeautifulSoup	29	from bs4 import BeautifulSoup
31	from bs4 import SoupStrainer	30	from bs4 import SoupStrainer
32		31
@@ -52,18 +51,19 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
52		51
53		52
54	class Wget(FetchMethod):	53	class Wget(FetchMethod):
		54	"""Class to fetch urls via 'wget'"""
55		55
56	# CDNs like CloudFlare may do a 'browser integrity test' which can fail	56	def check_certs(self, d):
57	# with the standard wget/urllib User-Agent, so pretend to be a modern	57	"""
58	# browser.	58	Should certificates be checked?
59	user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"	59	"""
		60	return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
60		61
61	"""Class to fetch urls via 'wget'"""
62	def supports(self, ud, d):	62	def supports(self, ud, d):
63	"""	63	"""
64	Check to see if a given url can be fetched with wget.	64	Check to see if a given url can be fetched with wget.
65	"""	65	"""
66	return ud.type in ['http', 'https', 'ftp']	66	return ud.type in ['http', 'https', 'ftp', 'ftps']
67		67
68	def recommends_checksum(self, urldata):	68	def recommends_checksum(self, urldata):
69	return True	69	return True
@@ -78,11 +78,17 @@ class Wget(FetchMethod):
78	else:	78	else:
79	ud.basename = os.path.basename(ud.path)	79	ud.basename = os.path.basename(ud.path)
80		80
81	ud.localfile = d.expand(urllib.parse.unquote(ud.basename))	81	ud.localfile = ud.basename
82	if not ud.localfile:	82	if not ud.localfile:
83	ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))	83	ud.localfile = ud.host + ud.path.replace("/", ".")
84		84
85	self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"	85	self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
		86
		87	if ud.type == 'ftp' or ud.type == 'ftps':
		88	self.basecmd += " --passive-ftp"
		89
		90	if not self.check_certs(d):
		91	self.basecmd += " --no-check-certificate"
86		92
87	def _runwget(self, ud, d, command, quiet, workdir=None):	93	def _runwget(self, ud, d, command, quiet, workdir=None):
88		94
@@ -90,39 +96,53 @@ class Wget(FetchMethod):
90		96
91	logger.debug2("Fetching %s using command '%s'" % (ud.url, command))	97	logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
92	bb.fetch2.check_network_access(d, command, ud.url)	98	bb.fetch2.check_network_access(d, command, ud.url)
93	runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)	99	runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
94		100
95	def download(self, ud, d):	101	def download(self, ud, d):
96	"""Fetch urls"""	102	"""Fetch urls"""
97		103
98	fetchcmd = self.basecmd	104	fetchcmd = self.basecmd
99		105
100	if 'downloadfilename' in ud.parm:	106	dldir = os.path.realpath(d.getVar("DL_DIR"))
101	localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)	107	localpath = os.path.join(dldir, ud.localfile) + ".tmp"
102	bb.utils.mkdirhier(os.path.dirname(localpath))	108	bb.utils.mkdirhier(os.path.dirname(localpath))
103	fetchcmd += " -O %s" % shlex.quote(localpath)	109	fetchcmd += " --output-document=%s" % shlex.quote(localpath)
104		110
105	if ud.user and ud.pswd:	111	if ud.user and ud.pswd:
106	fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)	112	fetchcmd += " --auth-no-challenge"
		113	if ud.parm.get("redirectauth", "1") == "1":
		114	# An undocumented feature of wget is that if the
		115	# username/password are specified on the URI, wget will only
		116	# send the Authorization header to the first host and not to
		117	# any hosts that it is redirected to. With the increasing
		118	# usage of temporary AWS URLs, this difference now matters as
		119	# AWS will reject any request that has authentication both in
		120	# the query parameters (from the redirect) and in the
		121	# Authorization header.
		122	fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
107		123
108	uri = ud.url.split(";")[0]	124	uri = ud.url.split(";")[0]
109	if os.path.exists(ud.localpath):	125	fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
110	# file exists, but we didnt complete it.. trying again..
111	fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
112	else:
113	fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
114
115	self._runwget(ud, d, fetchcmd, False)	126	self._runwget(ud, d, fetchcmd, False)
116		127
117	# Sanity check since wget can pretend it succeed when it didn't	128	# Sanity check since wget can pretend it succeed when it didn't
118	# Also, this used to happen if sourceforge sent us to the mirror page	129	# Also, this used to happen if sourceforge sent us to the mirror page
119	if not os.path.exists(ud.localpath):	130	if not os.path.exists(localpath):
120	raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)	131	raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
121		132
122	if os.path.getsize(ud.localpath) == 0:	133	if os.path.getsize(localpath) == 0:
123	os.remove(ud.localpath)	134	os.remove(localpath)
124	raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)	135	raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
125		136
		137	# Try and verify any checksum now, meaning if it isn't correct, we don't remove the
		138	# original file, which might be a race (imagine two recipes referencing the same
		139	# source, one with an incorrect checksum)
		140	bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
		141
		142	# Remove the ".tmp" and move the file into position atomically
		143	# Our lock prevents multiple writers but mirroring code may grab incomplete files
		144	os.rename(localpath, localpath[:-4])
		145
126	return True	146	return True
127		147
128	def checkstatus(self, fetch, ud, d, try_again=True):	148	def checkstatus(self, fetch, ud, d, try_again=True):
@@ -209,12 +229,17 @@ class Wget(FetchMethod):
209	# We let the request fail and expect it to be	229	# We let the request fail and expect it to be
210	# tried once more ("try_again" in check_status()),	230	# tried once more ("try_again" in check_status()),
211	# with the dead connection removed from the cache.	231	# with the dead connection removed from the cache.
212	# If it still fails, we give up, which can happend for bad	232	# If it still fails, we give up, which can happen for bad
213	# HTTP proxy settings.	233	# HTTP proxy settings.
214	fetch.connection_cache.remove_connection(h.host, h.port)	234	fetch.connection_cache.remove_connection(h.host, h.port)
215	raise urllib.error.URLError(err)	235	raise urllib.error.URLError(err)
216	else:	236	else:
217	r = h.getresponse()	237	try:
		238	r = h.getresponse()
		239	except TimeoutError as e:
		240	if fetch.connection_cache:
		241	fetch.connection_cache.remove_connection(h.host, h.port)
		242	raise TimeoutError(e)
218		243
219	# Pick apart the HTTPResponse object to get the addinfourl	244	# Pick apart the HTTPResponse object to get the addinfourl
220	# object initialized properly.	245	# object initialized properly.
@@ -275,71 +300,118 @@ class Wget(FetchMethod):
275		300
276	class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):	301	class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
277	"""	302	"""
278	urllib2.HTTPRedirectHandler resets the method to GET on redirect,	303	urllib2.HTTPRedirectHandler before 3.13 has two flaws:
279	when we want to follow redirects using the original method.	304
		305	It resets the method to GET on redirect when we want to follow
		306	redirects using the original method (typically HEAD). This was fixed
		307	in 759e8e7.
		308
		309	It also doesn't handle 308 (Permanent Redirect). This was fixed in
		310	c379bc5.
		311
		312	Until we depend on Python 3.13 onwards, copy the redirect_request
		313	method to fix these issues.
280	"""	314	"""
281	def redirect_request(self, req, fp, code, msg, headers, newurl):	315	def redirect_request(self, req, fp, code, msg, headers, newurl):
282	newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)	316	m = req.get_method()
283	newreq.get_method = req.get_method	317	if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
284	return newreq	318	or code in (301, 302, 303) and m == "POST")):
285	exported_proxies = export_proxies(d)	319	raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
286		320
287	handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]	321	# Strictly (according to RFC 2616), 301 or 302 in response to
288	if exported_proxies:	322	# a POST MUST NOT cause a redirection without confirmation
289	handlers.append(urllib.request.ProxyHandler())	323	# from the user (of urllib.request, in this case). In practice,
290	handlers.append(CacheHTTPHandler())	324	# essentially all clients do redirect in this case, so we do
291	# Since Python 2.7.9 ssl cert validation is enabled by default	325	# the same.
292	# see PEP-0476, this causes verification errors on some https servers	326
293	# so disable by default.	327	# Be conciliant with URIs containing a space. This is mainly
294	import ssl	328	# redundant with the more complete encoding done in http_error_302(),
295	if hasattr(ssl, '_create_unverified_context'):	329	# but it is kept for compatibility with other callers.
296	handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))	330	newurl = newurl.replace(' ', '%20')
297	opener = urllib.request.build_opener(*handlers)	331
298		332	CONTENT_HEADERS = ("content-length", "content-type")
299	try:	333	newheaders = {k: v for k, v in req.headers.items()
300	uri = ud.url.split(";")[0]	334	if k.lower() not in CONTENT_HEADERS}
301	r = urllib.request.Request(uri)	335	return urllib.request.Request(newurl,
302	r.get_method = lambda: "HEAD"	336	method="HEAD" if m == "HEAD" else "GET",
303	# Some servers (FusionForge, as used on Alioth) require that the	337	headers=newheaders,
304	# optional Accept header is set.	338	origin_req_host=req.origin_req_host,
305	r.add_header("Accept", "/")	339	unverifiable=True)
306	r.add_header("User-Agent", self.user_agent)	340
307	def add_basic_auth(login_str, request):	341	http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
308	'''Adds Basic auth to http request, pass in login:password as string'''	342
309	import base64	343	# We need to update the environment here as both the proxy and HTTPS
310	encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")	344	# handlers need variables set. The proxy needs http_proxy and friends to
311	authheader = "Basic %s" % encodeuser	345	# be set, and HTTPSHandler ends up calling into openssl to load the
312	r.add_header("Authorization", authheader)	346	# certificates. In buildtools configurations this will be looking at the
313		347	# wrong place for certificates by default: we set SSL_CERT_FILE to the
314	if ud.user and ud.pswd:	348	# right location in the buildtools environment script but as BitBake
315	add_basic_auth(ud.user + ':' + ud.pswd, r)	349	# prunes prunes the environment this is lost. When binaries are executed
		350	# runfetchcmd ensures these values are in the environment, but this is
		351	# pure Python so we need to update the environment.
		352	#
		353	# Avoid tramping the environment too much by using bb.utils.environment
		354	# to scope the changes to the build_opener request, which is when the
		355	# environment lookups happen.
		356	newenv = bb.fetch2.get_fetcher_environment(d)
		357
		358	with bb.utils.environment(**newenv):
		359	import ssl
		360
		361	if self.check_certs(d):
		362	context = ssl.create_default_context()
		363	else:
		364	context = ssl._create_unverified_context()
		365
		366	handlers = [FixedHTTPRedirectHandler,
		367	HTTPMethodFallback,
		368	urllib.request.ProxyHandler(),
		369	CacheHTTPHandler(),
		370	urllib.request.HTTPSHandler(context=context)]
		371	opener = urllib.request.build_opener(*handlers)
316		372
317	try:	373	try:
318	import netrc	374	parts = urllib.parse.urlparse(ud.url.split(";")[0])
319	n = netrc.netrc()	375	if parts.query:
320	login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)	376	uri = "{}://{}{}?{}".format(parts.scheme, parts.netloc, parts.path, parts.query)
321	add_basic_auth("%s:%s" % (login, password), r)	377	else:
322	except (TypeError, ImportError, IOError, netrc.NetrcParseError):	378	uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
323	pass	379	r = urllib.request.Request(uri)
324		380	r.get_method = lambda: "HEAD"
325	with opener.open(r) as response:	381	# Some servers (FusionForge, as used on Alioth) require that the
326	pass	382	# optional Accept header is set.
327	except urllib.error.URLError as e:	383	r.add_header("Accept", "/")
328	if try_again:	384	r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
329	logger.debug2("checkstatus: trying again")	385	def add_basic_auth(login_str, request):
330	return self.checkstatus(fetch, ud, d, False)	386	'''Adds Basic auth to http request, pass in login:password as string'''
331	else:	387	import base64
332	# debug for now to avoid spamming the logs in e.g. remote sstate searches	388	encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
333	logger.debug2("checkstatus() urlopen failed: %s" % e)	389	authheader = "Basic %s" % encodeuser
334	return False	390	r.add_header("Authorization", authheader)
335	except ConnectionResetError as e:	391
336	if try_again:	392	if ud.user and ud.pswd:
337	logger.debug2("checkstatus: trying again")	393	add_basic_auth(ud.user + ':' + ud.pswd, r)
338	return self.checkstatus(fetch, ud, d, False)	394
339	else:	395	try:
340	# debug for now to avoid spamming the logs in e.g. remote sstate searches	396	import netrc
341	logger.debug2("checkstatus() urlopen failed: %s" % e)	397	auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
342	return False	398	if auth_data:
		399	login, _, password = auth_data
		400	add_basic_auth("%s:%s" % (login, password), r)
		401	except (FileNotFoundError, netrc.NetrcParseError):
		402	pass
		403
		404	with opener.open(r, timeout=100) as response:
		405	pass
		406	except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
		407	if try_again:
		408	logger.debug2("checkstatus: trying again")
		409	return self.checkstatus(fetch, ud, d, False)
		410	else:
		411	# debug for now to avoid spamming the logs in e.g. remote sstate searches
		412	logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
		413	return False
		414
343	return True	415	return True
344		416
345	def _parse_path(self, regex, s):	417	def _parse_path(self, regex, s):
@@ -416,7 +488,7 @@ class Wget(FetchMethod):
416	f = tempfile.NamedTemporaryFile()	488	f = tempfile.NamedTemporaryFile()
417	with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:	489	with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
418	fetchcmd = self.basecmd	490	fetchcmd = self.basecmd
419	fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"	491	fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
420	try:	492	try:
421	self._runwget(ud, d, fetchcmd, True, workdir=workdir)	493	self._runwget(ud, d, fetchcmd, True, workdir=workdir)
422	fetchresult = f.read()	494	fetchresult = f.read()
@@ -472,7 +544,7 @@ class Wget(FetchMethod):
472	version_dir = ['', '', '']	544	version_dir = ['', '', '']
473	version = ['', '', '']	545	version = ['', '', '']
474		546
475	dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")	547	dirver_regex = re.compile(r"(?P<pfx>\D)(?P<ver>(\d+[\.\-_])(\d+))")
476	s = dirver_regex.search(dirver)	548	s = dirver_regex.search(dirver)
477	if s:	549	if s:
478	version_dir[1] = s.group('ver')	550	version_dir[1] = s.group('ver')
@@ -548,7 +620,7 @@ class Wget(FetchMethod):
548		620
549	# src.rpm extension was added only for rpm package. Can be removed if the rpm	621	# src.rpm extension was added only for rpm package. Can be removed if the rpm
550	# packaged will always be considered as having to be manually upgraded	622	# packaged will always be considered as having to be manually upgraded
551	psuffix_regex = r"(tar\.gz\|tgz\|tar\.bz2\|zip\|xz\|tar\.lz\|rpm\|bz2\|orig\.tar\.gz\|tar\.xz\|src\.tar\.gz\|src\.tgz\|svnr\d+\.tar\.bz2\|stable\.tar\.gz\|src\.rpm)"	623	psuffix_regex = r"(tar\.\w+\|tgz\|zip\|xz\|rpm\|bz2\|orig\.tar\.\w+\|src\.tar\.\w+\|src\.tgz\|svnr\d+\.tar\.\w+\|stable\.tar\.\w+\|src\.rpm)"
552		624
553	# match name, version and archive type of a package	625	# match name, version and archive type of a package
554	package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"	626	package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
@@ -576,13 +648,17 @@ class Wget(FetchMethod):
576		648
577	sanity check to ensure same name and type.	649	sanity check to ensure same name and type.
578	"""	650	"""
579	package = ud.path.split("/")[-1]	651	if 'downloadfilename' in ud.parm:
		652	package = ud.parm['downloadfilename']
		653	else:
		654	package = ud.path.split("/")[-1]
580	current_version = ['', d.getVar('PV'), '']	655	current_version = ['', d.getVar('PV'), '']
581		656
582	"""possible to have no version in pkg name, such as spectrum-fw"""	657	"""possible to have no version in pkg name, such as spectrum-fw"""
583	if not re.search(r"\d+", package):	658	if not re.search(r"\d+", package):
584	current_version[1] = re.sub('_', '.', current_version[1])	659	current_version[1] = re.sub('_', '.', current_version[1])
585	current_version[1] = re.sub('-', '.', current_version[1])	660	current_version[1] = re.sub('-', '.', current_version[1])
		661	bb.debug(3, "latest_versionstring: no version found in %s" % package)
586	return (current_version[1], '')	662	return (current_version[1], '')
587		663
588	package_regex = self._init_regexes(package, ud, d)	664	package_regex = self._init_regexes(package, ud, d)
@@ -599,10 +675,10 @@ class Wget(FetchMethod):
599	# search for version matches on folders inside the path, like:	675	# search for version matches on folders inside the path, like:
600	# "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz	676	# "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
601	dirver_regex = re.compile(r"(?P<dirver>[^/](\d+\.)\d+([-_]r\d+)*)/")	677	dirver_regex = re.compile(r"(?P<dirver>[^/](\d+\.)\d+([-_]r\d+)*)/")
602	m = dirver_regex.search(path)	678	m = dirver_regex.findall(path)
603	if m:	679	if m:
604	pn = d.getVar('PN')	680	pn = d.getVar('PN')
605	dirver = m.group('dirver')	681	dirver = m[-1][0]
606		682
607	dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))	683	dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
608	if not dirver_pn_regex.search(dirver):	684	if not dirver_pn_regex.search(dirver):