1 files changed, 73 insertions, 38 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index dc025800e6..7e43d3bc97 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -53,11 +53,6 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
 class Wget(FetchMethod):
    """Class to fetch urls via 'wget'"""
-    # CDNs like CloudFlare may do a 'browser integrity test' which can fail
-    # with the standard wget/urllib User-Agent, so pretend to be a modern
-    # browser.
-    user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
    def check_certs(self, d):
        """
        Should certificates be checked?
@@ -83,11 +78,14 @@ class Wget(FetchMethod):
        else:
            ud.basename = os.path.basename(ud.path)
-        ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
+        ud.localfile = ud.basename
        if not ud.localfile:
-            ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
+            ud.localfile = ud.host + ud.path.replace("/", ".")
+        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
-        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp"
+        if ud.type == 'ftp' or ud.type == 'ftps':
+            self.basecmd += " --passive-ftp"
        if not self.check_certs(d):
            self.basecmd += " --no-check-certificate"
@@ -98,16 +96,17 @@ class Wget(FetchMethod):
        logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
        bb.fetch2.check_network_access(d, command, ud.url)
-        runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
+        runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
    def download(self, ud, d):
        """Fetch urls"""
        fetchcmd = self.basecmd
-        localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
+        dldir = os.path.realpath(d.getVar("DL_DIR"))
+        localpath = os.path.join(dldir, ud.localfile) + ".tmp"
        bb.utils.mkdirhier(os.path.dirname(localpath))
-        fetchcmd += " -O %s" % shlex.quote(localpath)
+        fetchcmd += " --output-document=%s" % shlex.quote(localpath)
        if ud.user and ud.pswd:
            fetchcmd += " --auth-no-challenge"
@@ -123,14 +122,18 @@ class Wget(FetchMethod):
                fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
        uri = ud.url.split(";")[0]
-        if os.path.exists(ud.localpath):
+        fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
-            # file exists, but we didnt complete it.. trying again..
-            fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
-        else:
-            fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
        self._runwget(ud, d, fetchcmd, False)
+        # Sanity check since wget can pretend it succeed when it didn't
+        # Also, this used to happen if sourceforge sent us to the mirror page
+        if not os.path.exists(localpath):
+            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
+        if os.path.getsize(localpath) == 0:
+            os.remove(localpath)
+            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
        # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
        # original file, which might be a race (imagine two recipes referencing the same
        # source, one with an incorrect checksum)
@@ -140,15 +143,6 @@ class Wget(FetchMethod):
        # Our lock prevents multiple writers but mirroring code may grab incomplete files
        os.rename(localpath, localpath[:-4])
-        # Sanity check since wget can pretend it succeed when it didn't
-        # Also, this used to happen if sourceforge sent us to the mirror page
-        if not os.path.exists(ud.localpath):
-            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
-        if os.path.getsize(ud.localpath) == 0:
-            os.remove(ud.localpath)
-            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
        return True
    def checkstatus(self, fetch, ud, d, try_again=True):
@@ -240,7 +234,12 @@ class Wget(FetchMethod):
                        fetch.connection_cache.remove_connection(h.host, h.port)
                    raise urllib.error.URLError(err)
                else:
-                    r = h.getresponse()
+                    try:
+                        r = h.getresponse()
+                    except TimeoutError as e:
+                        if fetch.connection_cache:
+                            fetch.connection_cache.remove_connection(h.host, h.port)
+                        raise TimeoutError(e)
                # Pick apart the HTTPResponse object to get the addinfourl
                # object initialized properly.
@@ -301,13 +300,45 @@ class Wget(FetchMethod):
        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
            """
-            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
+            urllib2.HTTPRedirectHandler before 3.13 has two flaws:
-            when we want to follow redirects using the original method.
+            
+            It resets the method to GET on redirect when we want to follow
+            redirects using the original method (typically HEAD). This was fixed
+            in 759e8e7.
+            It also doesn't handle 308 (Permanent Redirect). This was fixed in
+            c379bc5.
+            Until we depend on Python 3.13 onwards, copy the redirect_request
+            method to fix these issues.
            """
            def redirect_request(self, req, fp, code, msg, headers, newurl):
-                newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
+                m = req.get_method()
-                newreq.get_method = req.get_method
+                if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
-                return newreq
+                    or code in (301, 302, 303) and m == "POST")):
+                    raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
+                # Strictly (according to RFC 2616), 301 or 302 in response to
+                # a POST MUST NOT cause a redirection without confirmation
+                # from the user (of urllib.request, in this case).  In practice,
+                # essentially all clients do redirect in this case, so we do
+                # the same.
+                # Be conciliant with URIs containing a space.  This is mainly
+                # redundant with the more complete encoding done in http_error_302(),
+                # but it is kept for compatibility with other callers.
+                newurl = newurl.replace(' ', '%20')
+                CONTENT_HEADERS = ("content-length", "content-type")
+                newheaders = {k: v for k, v in req.headers.items()
+                            if k.lower() not in CONTENT_HEADERS}
+                return urllib.request.Request(newurl,
+                            method="HEAD" if m == "HEAD" else "GET",
+                            headers=newheaders,
+                            origin_req_host=req.origin_req_host,
+                            unverifiable=True)
+            http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
        # We need to update the environment here as both the proxy and HTTPS
        # handlers need variables set. The proxy needs http_proxy and friends to
@@ -340,14 +371,14 @@ class Wget(FetchMethod):
            opener = urllib.request.build_opener(*handlers)
            try:
-                uri_base = ud.url.split(";")[0]
+                parts = urllib.parse.urlparse(ud.url.split(";")[0])
-                uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)
+                uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
                r = urllib.request.Request(uri)
                r.get_method = lambda: "HEAD"
                # Some servers (FusionForge, as used on Alioth) require that the
                # optional Accept header is set.
                r.add_header("Accept", "*/*")
-                r.add_header("User-Agent", self.user_agent)
+                r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
                def add_basic_auth(login_str, request):
                    '''Adds Basic auth to http request, pass in login:password as string'''
                    import base64
@@ -367,7 +398,7 @@ class Wget(FetchMethod):
                except (FileNotFoundError, netrc.NetrcParseError):
                    pass
-                with opener.open(r, timeout=30) as response:
+                with opener.open(r, timeout=100) as response:
                    pass
            except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
                if try_again:
@@ -454,7 +485,7 @@ class Wget(FetchMethod):
        f = tempfile.NamedTemporaryFile()
        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
            fetchcmd = self.basecmd
-            fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
+            fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
            try:
                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
                fetchresult = f.read()
@@ -614,13 +645,17 @@ class Wget(FetchMethod):
        sanity check to ensure same name and type.
        """
-        package = ud.path.split("/")[-1]
+        if 'downloadfilename' in ud.parm:
+            package = ud.parm['downloadfilename']
+        else:
+            package = ud.path.split("/")[-1]
        current_version = ['', d.getVar('PV'), '']
        """possible to have no version in pkg name, such as spectrum-fw"""
        if not re.search(r"\d+", package):
            current_version[1] = re.sub('_', '.', current_version[1])
            current_version[1] = re.sub('-', '.', current_version[1])
+            bb.debug(3, "latest_versionstring: no version found in %s" % package)
            return (current_version[1], '')
        package_regex = self._init_regexes(package, ud, d)

diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py index dc025800e6..7e43d3bc97 100644 --- a/bitbake/lib/bb/fetch2/wget.py +++ b/bitbake/lib/bb/fetch2/wget.py
@@ -53,11 +53,6 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
53	class Wget(FetchMethod):	53	class Wget(FetchMethod):
54	"""Class to fetch urls via 'wget'"""	54	"""Class to fetch urls via 'wget'"""
55		55
56	# CDNs like CloudFlare may do a 'browser integrity test' which can fail
57	# with the standard wget/urllib User-Agent, so pretend to be a modern
58	# browser.
59	user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
60
61	def check_certs(self, d):	56	def check_certs(self, d):
62	"""	57	"""
63	Should certificates be checked?	58	Should certificates be checked?
@@ -83,11 +78,14 @@ class Wget(FetchMethod):
83	else:	78	else:
84	ud.basename = os.path.basename(ud.path)	79	ud.basename = os.path.basename(ud.path)
85		80
86	ud.localfile = d.expand(urllib.parse.unquote(ud.basename))	81	ud.localfile = ud.basename
87	if not ud.localfile:	82	if not ud.localfile:
88	ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))	83	ud.localfile = ud.host + ud.path.replace("/", ".")
		84
		85	self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
89		86
90	self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp"	87	if ud.type == 'ftp' or ud.type == 'ftps':
		88	self.basecmd += " --passive-ftp"
91		89
92	if not self.check_certs(d):	90	if not self.check_certs(d):
93	self.basecmd += " --no-check-certificate"	91	self.basecmd += " --no-check-certificate"
@@ -98,16 +96,17 @@ class Wget(FetchMethod):
98		96
99	logger.debug2("Fetching %s using command '%s'" % (ud.url, command))	97	logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
100	bb.fetch2.check_network_access(d, command, ud.url)	98	bb.fetch2.check_network_access(d, command, ud.url)
101	runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)	99	runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
102		100
103	def download(self, ud, d):	101	def download(self, ud, d):
104	"""Fetch urls"""	102	"""Fetch urls"""
105		103
106	fetchcmd = self.basecmd	104	fetchcmd = self.basecmd
107		105
108	localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"	106	dldir = os.path.realpath(d.getVar("DL_DIR"))
		107	localpath = os.path.join(dldir, ud.localfile) + ".tmp"
109	bb.utils.mkdirhier(os.path.dirname(localpath))	108	bb.utils.mkdirhier(os.path.dirname(localpath))
110	fetchcmd += " -O %s" % shlex.quote(localpath)	109	fetchcmd += " --output-document=%s" % shlex.quote(localpath)
111		110
112	if ud.user and ud.pswd:	111	if ud.user and ud.pswd:
113	fetchcmd += " --auth-no-challenge"	112	fetchcmd += " --auth-no-challenge"
@@ -123,14 +122,18 @@ class Wget(FetchMethod):
123	fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)	122	fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
124		123
125	uri = ud.url.split(";")[0]	124	uri = ud.url.split(";")[0]
126	if os.path.exists(ud.localpath):	125	fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
127	# file exists, but we didnt complete it.. trying again..
128	fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
129	else:
130	fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
131
132	self._runwget(ud, d, fetchcmd, False)	126	self._runwget(ud, d, fetchcmd, False)
133		127
		128	# Sanity check since wget can pretend it succeed when it didn't
		129	# Also, this used to happen if sourceforge sent us to the mirror page
		130	if not os.path.exists(localpath):
		131	raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
		132
		133	if os.path.getsize(localpath) == 0:
		134	os.remove(localpath)
		135	raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
		136
134	# Try and verify any checksum now, meaning if it isn't correct, we don't remove the	137	# Try and verify any checksum now, meaning if it isn't correct, we don't remove the
135	# original file, which might be a race (imagine two recipes referencing the same	138	# original file, which might be a race (imagine two recipes referencing the same
136	# source, one with an incorrect checksum)	139	# source, one with an incorrect checksum)
@@ -140,15 +143,6 @@ class Wget(FetchMethod):
140	# Our lock prevents multiple writers but mirroring code may grab incomplete files	143	# Our lock prevents multiple writers but mirroring code may grab incomplete files
141	os.rename(localpath, localpath[:-4])	144	os.rename(localpath, localpath[:-4])
142		145
143	# Sanity check since wget can pretend it succeed when it didn't
144	# Also, this used to happen if sourceforge sent us to the mirror page
145	if not os.path.exists(ud.localpath):
146	raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
147
148	if os.path.getsize(ud.localpath) == 0:
149	os.remove(ud.localpath)
150	raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
151
152	return True	146	return True
153		147
154	def checkstatus(self, fetch, ud, d, try_again=True):	148	def checkstatus(self, fetch, ud, d, try_again=True):
@@ -240,7 +234,12 @@ class Wget(FetchMethod):
240	fetch.connection_cache.remove_connection(h.host, h.port)	234	fetch.connection_cache.remove_connection(h.host, h.port)
241	raise urllib.error.URLError(err)	235	raise urllib.error.URLError(err)
242	else:	236	else:
243	r = h.getresponse()	237	try:
		238	r = h.getresponse()
		239	except TimeoutError as e:
		240	if fetch.connection_cache:
		241	fetch.connection_cache.remove_connection(h.host, h.port)
		242	raise TimeoutError(e)
244		243
245	# Pick apart the HTTPResponse object to get the addinfourl	244	# Pick apart the HTTPResponse object to get the addinfourl
246	# object initialized properly.	245	# object initialized properly.
@@ -301,13 +300,45 @@ class Wget(FetchMethod):
301		300
302	class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):	301	class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
303	"""	302	"""
304	urllib2.HTTPRedirectHandler resets the method to GET on redirect,	303	urllib2.HTTPRedirectHandler before 3.13 has two flaws:
305	when we want to follow redirects using the original method.	304
		305	It resets the method to GET on redirect when we want to follow
		306	redirects using the original method (typically HEAD). This was fixed
		307	in 759e8e7.
		308
		309	It also doesn't handle 308 (Permanent Redirect). This was fixed in
		310	c379bc5.
		311
		312	Until we depend on Python 3.13 onwards, copy the redirect_request
		313	method to fix these issues.
306	"""	314	"""
307	def redirect_request(self, req, fp, code, msg, headers, newurl):	315	def redirect_request(self, req, fp, code, msg, headers, newurl):
308	newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)	316	m = req.get_method()
309	newreq.get_method = req.get_method	317	if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
310	return newreq	318	or code in (301, 302, 303) and m == "POST")):
		319	raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
		320
		321	# Strictly (according to RFC 2616), 301 or 302 in response to
		322	# a POST MUST NOT cause a redirection without confirmation
		323	# from the user (of urllib.request, in this case). In practice,
		324	# essentially all clients do redirect in this case, so we do
		325	# the same.
		326
		327	# Be conciliant with URIs containing a space. This is mainly
		328	# redundant with the more complete encoding done in http_error_302(),
		329	# but it is kept for compatibility with other callers.
		330	newurl = newurl.replace(' ', '%20')
		331
		332	CONTENT_HEADERS = ("content-length", "content-type")
		333	newheaders = {k: v for k, v in req.headers.items()
		334	if k.lower() not in CONTENT_HEADERS}
		335	return urllib.request.Request(newurl,
		336	method="HEAD" if m == "HEAD" else "GET",
		337	headers=newheaders,
		338	origin_req_host=req.origin_req_host,
		339	unverifiable=True)
		340
		341	http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
311		342
312	# We need to update the environment here as both the proxy and HTTPS	343	# We need to update the environment here as both the proxy and HTTPS
313	# handlers need variables set. The proxy needs http_proxy and friends to	344	# handlers need variables set. The proxy needs http_proxy and friends to
@@ -340,14 +371,14 @@ class Wget(FetchMethod):
340	opener = urllib.request.build_opener(*handlers)	371	opener = urllib.request.build_opener(*handlers)
341		372
342	try:	373	try:
343	uri_base = ud.url.split(";")[0]	374	parts = urllib.parse.urlparse(ud.url.split(";")[0])
344	uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)	375	uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
345	r = urllib.request.Request(uri)	376	r = urllib.request.Request(uri)
346	r.get_method = lambda: "HEAD"	377	r.get_method = lambda: "HEAD"
347	# Some servers (FusionForge, as used on Alioth) require that the	378	# Some servers (FusionForge, as used on Alioth) require that the
348	# optional Accept header is set.	379	# optional Accept header is set.
349	r.add_header("Accept", "/")	380	r.add_header("Accept", "/")
350	r.add_header("User-Agent", self.user_agent)	381	r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
351	def add_basic_auth(login_str, request):	382	def add_basic_auth(login_str, request):
352	'''Adds Basic auth to http request, pass in login:password as string'''	383	'''Adds Basic auth to http request, pass in login:password as string'''
353	import base64	384	import base64
@@ -367,7 +398,7 @@ class Wget(FetchMethod):
367	except (FileNotFoundError, netrc.NetrcParseError):	398	except (FileNotFoundError, netrc.NetrcParseError):
368	pass	399	pass
369		400
370	with opener.open(r, timeout=30) as response:	401	with opener.open(r, timeout=100) as response:
371	pass	402	pass
372	except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:	403	except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
373	if try_again:	404	if try_again:
@@ -454,7 +485,7 @@ class Wget(FetchMethod):
454	f = tempfile.NamedTemporaryFile()	485	f = tempfile.NamedTemporaryFile()
455	with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:	486	with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
456	fetchcmd = self.basecmd	487	fetchcmd = self.basecmd
457	fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"	488	fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
458	try:	489	try:
459	self._runwget(ud, d, fetchcmd, True, workdir=workdir)	490	self._runwget(ud, d, fetchcmd, True, workdir=workdir)
460	fetchresult = f.read()	491	fetchresult = f.read()
@@ -614,13 +645,17 @@ class Wget(FetchMethod):
614		645
615	sanity check to ensure same name and type.	646	sanity check to ensure same name and type.
616	"""	647	"""
617	package = ud.path.split("/")[-1]	648	if 'downloadfilename' in ud.parm:
		649	package = ud.parm['downloadfilename']
		650	else:
		651	package = ud.path.split("/")[-1]
618	current_version = ['', d.getVar('PV'), '']	652	current_version = ['', d.getVar('PV'), '']
619		653
620	"""possible to have no version in pkg name, such as spectrum-fw"""	654	"""possible to have no version in pkg name, such as spectrum-fw"""
621	if not re.search(r"\d+", package):	655	if not re.search(r"\d+", package):
622	current_version[1] = re.sub('_', '.', current_version[1])	656	current_version[1] = re.sub('_', '.', current_version[1])
623	current_version[1] = re.sub('-', '.', current_version[1])	657	current_version[1] = re.sub('-', '.', current_version[1])
		658	bb.debug(3, "latest_versionstring: no version found in %s" % package)
624	return (current_version[1], '')	659	return (current_version[1], '')
625		660
626	package_regex = self._init_regexes(package, ud, d)	661	package_regex = self._init_regexes(package, ud, d)