1 files changed, 70 insertions, 38 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index fbfa6938ac..7e43d3bc97 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -53,11 +53,6 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
 class Wget(FetchMethod):
    """Class to fetch urls via 'wget'"""
-    # CDNs like CloudFlare may do a 'browser integrity test' which can fail
-    # with the standard wget/urllib User-Agent, so pretend to be a modern
-    # browser.
-    user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
    def check_certs(self, d):
        """
        Should certificates be checked?
@@ -83,11 +78,11 @@ class Wget(FetchMethod):
        else:
            ud.basename = os.path.basename(ud.path)
-        ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
+        ud.localfile = ud.basename
        if not ud.localfile:
-            ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
+            ud.localfile = ud.host + ud.path.replace("/", ".")
-        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30"
+        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
        if ud.type == 'ftp' or ud.type == 'ftps':
            self.basecmd += " --passive-ftp"
@@ -101,16 +96,17 @@ class Wget(FetchMethod):
        logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
        bb.fetch2.check_network_access(d, command, ud.url)
-        runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
+        runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
    def download(self, ud, d):
        """Fetch urls"""
        fetchcmd = self.basecmd
-        localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
+        dldir = os.path.realpath(d.getVar("DL_DIR"))
+        localpath = os.path.join(dldir, ud.localfile) + ".tmp"
        bb.utils.mkdirhier(os.path.dirname(localpath))
-        fetchcmd += " -O %s" % shlex.quote(localpath)
+        fetchcmd += " --output-document=%s" % shlex.quote(localpath)
        if ud.user and ud.pswd:
            fetchcmd += " --auth-no-challenge"
@@ -126,14 +122,18 @@ class Wget(FetchMethod):
                fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
        uri = ud.url.split(";")[0]
-        if os.path.exists(ud.localpath):
+        fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
-            # file exists, but we didnt complete it.. trying again..
-            fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
-        else:
-            fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
        self._runwget(ud, d, fetchcmd, False)
+        # Sanity check since wget can pretend it succeed when it didn't
+        # Also, this used to happen if sourceforge sent us to the mirror page
+        if not os.path.exists(localpath):
+            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
+        if os.path.getsize(localpath) == 0:
+            os.remove(localpath)
+            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
        # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
        # original file, which might be a race (imagine two recipes referencing the same
        # source, one with an incorrect checksum)
@@ -143,15 +143,6 @@ class Wget(FetchMethod):
        # Our lock prevents multiple writers but mirroring code may grab incomplete files
        os.rename(localpath, localpath[:-4])
-        # Sanity check since wget can pretend it succeed when it didn't
-        # Also, this used to happen if sourceforge sent us to the mirror page
-        if not os.path.exists(ud.localpath):
-            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
-        if os.path.getsize(ud.localpath) == 0:
-            os.remove(ud.localpath)
-            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
        return True
    def checkstatus(self, fetch, ud, d, try_again=True):
@@ -243,7 +234,12 @@ class Wget(FetchMethod):
                        fetch.connection_cache.remove_connection(h.host, h.port)
                    raise urllib.error.URLError(err)
                else:
-                    r = h.getresponse()
+                    try:
+                        r = h.getresponse()
+                    except TimeoutError as e:
+                        if fetch.connection_cache:
+                            fetch.connection_cache.remove_connection(h.host, h.port)
+                        raise TimeoutError(e)
                # Pick apart the HTTPResponse object to get the addinfourl
                # object initialized properly.
@@ -304,13 +300,45 @@ class Wget(FetchMethod):
        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
            """
-            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
+            urllib2.HTTPRedirectHandler before 3.13 has two flaws:
-            when we want to follow redirects using the original method.
+            
+            It resets the method to GET on redirect when we want to follow
+            redirects using the original method (typically HEAD). This was fixed
+            in 759e8e7.
+            It also doesn't handle 308 (Permanent Redirect). This was fixed in
+            c379bc5.
+            Until we depend on Python 3.13 onwards, copy the redirect_request
+            method to fix these issues.
            """
            def redirect_request(self, req, fp, code, msg, headers, newurl):
-                newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
+                m = req.get_method()
-                newreq.get_method = req.get_method
+                if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
-                return newreq
+                    or code in (301, 302, 303) and m == "POST")):
+                    raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
+                # Strictly (according to RFC 2616), 301 or 302 in response to
+                # a POST MUST NOT cause a redirection without confirmation
+                # from the user (of urllib.request, in this case).  In practice,
+                # essentially all clients do redirect in this case, so we do
+                # the same.
+                # Be conciliant with URIs containing a space.  This is mainly
+                # redundant with the more complete encoding done in http_error_302(),
+                # but it is kept for compatibility with other callers.
+                newurl = newurl.replace(' ', '%20')
+                CONTENT_HEADERS = ("content-length", "content-type")
+                newheaders = {k: v for k, v in req.headers.items()
+                            if k.lower() not in CONTENT_HEADERS}
+                return urllib.request.Request(newurl,
+                            method="HEAD" if m == "HEAD" else "GET",
+                            headers=newheaders,
+                            origin_req_host=req.origin_req_host,
+                            unverifiable=True)
+            http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
        # We need to update the environment here as both the proxy and HTTPS
        # handlers need variables set. The proxy needs http_proxy and friends to
@@ -343,14 +371,14 @@ class Wget(FetchMethod):
            opener = urllib.request.build_opener(*handlers)
            try:
-                uri_base = ud.url.split(";")[0]
+                parts = urllib.parse.urlparse(ud.url.split(";")[0])
-                uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)
+                uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
                r = urllib.request.Request(uri)
                r.get_method = lambda: "HEAD"
                # Some servers (FusionForge, as used on Alioth) require that the
                # optional Accept header is set.
                r.add_header("Accept", "*/*")
-                r.add_header("User-Agent", self.user_agent)
+                r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
                def add_basic_auth(login_str, request):
                    '''Adds Basic auth to http request, pass in login:password as string'''
                    import base64
@@ -370,7 +398,7 @@ class Wget(FetchMethod):
                except (FileNotFoundError, netrc.NetrcParseError):
                    pass
-                with opener.open(r, timeout=30) as response:
+                with opener.open(r, timeout=100) as response:
                    pass
            except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
                if try_again:
@@ -457,7 +485,7 @@ class Wget(FetchMethod):
        f = tempfile.NamedTemporaryFile()
        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
            fetchcmd = self.basecmd
-            fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
+            fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
            try:
                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
                fetchresult = f.read()
@@ -617,13 +645,17 @@ class Wget(FetchMethod):
        sanity check to ensure same name and type.
        """
-        package = ud.path.split("/")[-1]
+        if 'downloadfilename' in ud.parm:
+            package = ud.parm['downloadfilename']
+        else:
+            package = ud.path.split("/")[-1]
        current_version = ['', d.getVar('PV'), '']
        """possible to have no version in pkg name, such as spectrum-fw"""
        if not re.search(r"\d+", package):
            current_version[1] = re.sub('_', '.', current_version[1])
            current_version[1] = re.sub('-', '.', current_version[1])
+            bb.debug(3, "latest_versionstring: no version found in %s" % package)
            return (current_version[1], '')
        package_regex = self._init_regexes(package, ud, d)

diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py index fbfa6938ac..7e43d3bc97 100644 --- a/bitbake/lib/bb/fetch2/wget.py +++ b/bitbake/lib/bb/fetch2/wget.py
@@ -53,11 +53,6 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
53	class Wget(FetchMethod):	53	class Wget(FetchMethod):
54	"""Class to fetch urls via 'wget'"""	54	"""Class to fetch urls via 'wget'"""
55		55
56	# CDNs like CloudFlare may do a 'browser integrity test' which can fail
57	# with the standard wget/urllib User-Agent, so pretend to be a modern
58	# browser.
59	user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
60
61	def check_certs(self, d):	56	def check_certs(self, d):
62	"""	57	"""
63	Should certificates be checked?	58	Should certificates be checked?
@@ -83,11 +78,11 @@ class Wget(FetchMethod):
83	else:	78	else:
84	ud.basename = os.path.basename(ud.path)	79	ud.basename = os.path.basename(ud.path)
85		80
86	ud.localfile = d.expand(urllib.parse.unquote(ud.basename))	81	ud.localfile = ud.basename
87	if not ud.localfile:	82	if not ud.localfile:
88	ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))	83	ud.localfile = ud.host + ud.path.replace("/", ".")
89		84
90	self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30"	85	self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
91		86
92	if ud.type == 'ftp' or ud.type == 'ftps':	87	if ud.type == 'ftp' or ud.type == 'ftps':
93	self.basecmd += " --passive-ftp"	88	self.basecmd += " --passive-ftp"
@@ -101,16 +96,17 @@ class Wget(FetchMethod):
101		96
102	logger.debug2("Fetching %s using command '%s'" % (ud.url, command))	97	logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
103	bb.fetch2.check_network_access(d, command, ud.url)	98	bb.fetch2.check_network_access(d, command, ud.url)
104	runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)	99	runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
105		100
106	def download(self, ud, d):	101	def download(self, ud, d):
107	"""Fetch urls"""	102	"""Fetch urls"""
108		103
109	fetchcmd = self.basecmd	104	fetchcmd = self.basecmd
110		105
111	localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"	106	dldir = os.path.realpath(d.getVar("DL_DIR"))
		107	localpath = os.path.join(dldir, ud.localfile) + ".tmp"
112	bb.utils.mkdirhier(os.path.dirname(localpath))	108	bb.utils.mkdirhier(os.path.dirname(localpath))
113	fetchcmd += " -O %s" % shlex.quote(localpath)	109	fetchcmd += " --output-document=%s" % shlex.quote(localpath)
114		110
115	if ud.user and ud.pswd:	111	if ud.user and ud.pswd:
116	fetchcmd += " --auth-no-challenge"	112	fetchcmd += " --auth-no-challenge"
@@ -126,14 +122,18 @@ class Wget(FetchMethod):
126	fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)	122	fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
127		123
128	uri = ud.url.split(";")[0]	124	uri = ud.url.split(";")[0]
129	if os.path.exists(ud.localpath):	125	fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
130	# file exists, but we didnt complete it.. trying again..
131	fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
132	else:
133	fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
134
135	self._runwget(ud, d, fetchcmd, False)	126	self._runwget(ud, d, fetchcmd, False)
136		127
		128	# Sanity check since wget can pretend it succeed when it didn't
		129	# Also, this used to happen if sourceforge sent us to the mirror page
		130	if not os.path.exists(localpath):
		131	raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
		132
		133	if os.path.getsize(localpath) == 0:
		134	os.remove(localpath)
		135	raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
		136
137	# Try and verify any checksum now, meaning if it isn't correct, we don't remove the	137	# Try and verify any checksum now, meaning if it isn't correct, we don't remove the
138	# original file, which might be a race (imagine two recipes referencing the same	138	# original file, which might be a race (imagine two recipes referencing the same
139	# source, one with an incorrect checksum)	139	# source, one with an incorrect checksum)
@@ -143,15 +143,6 @@ class Wget(FetchMethod):
143	# Our lock prevents multiple writers but mirroring code may grab incomplete files	143	# Our lock prevents multiple writers but mirroring code may grab incomplete files
144	os.rename(localpath, localpath[:-4])	144	os.rename(localpath, localpath[:-4])
145		145
146	# Sanity check since wget can pretend it succeed when it didn't
147	# Also, this used to happen if sourceforge sent us to the mirror page
148	if not os.path.exists(ud.localpath):
149	raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
150
151	if os.path.getsize(ud.localpath) == 0:
152	os.remove(ud.localpath)
153	raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
154
155	return True	146	return True
156		147
157	def checkstatus(self, fetch, ud, d, try_again=True):	148	def checkstatus(self, fetch, ud, d, try_again=True):
@@ -243,7 +234,12 @@ class Wget(FetchMethod):
243	fetch.connection_cache.remove_connection(h.host, h.port)	234	fetch.connection_cache.remove_connection(h.host, h.port)
244	raise urllib.error.URLError(err)	235	raise urllib.error.URLError(err)
245	else:	236	else:
246	r = h.getresponse()	237	try:
		238	r = h.getresponse()
		239	except TimeoutError as e:
		240	if fetch.connection_cache:
		241	fetch.connection_cache.remove_connection(h.host, h.port)
		242	raise TimeoutError(e)
247		243
248	# Pick apart the HTTPResponse object to get the addinfourl	244	# Pick apart the HTTPResponse object to get the addinfourl
249	# object initialized properly.	245	# object initialized properly.
@@ -304,13 +300,45 @@ class Wget(FetchMethod):
304		300
305	class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):	301	class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
306	"""	302	"""
307	urllib2.HTTPRedirectHandler resets the method to GET on redirect,	303	urllib2.HTTPRedirectHandler before 3.13 has two flaws:
308	when we want to follow redirects using the original method.	304
		305	It resets the method to GET on redirect when we want to follow
		306	redirects using the original method (typically HEAD). This was fixed
		307	in 759e8e7.
		308
		309	It also doesn't handle 308 (Permanent Redirect). This was fixed in
		310	c379bc5.
		311
		312	Until we depend on Python 3.13 onwards, copy the redirect_request
		313	method to fix these issues.
309	"""	314	"""
310	def redirect_request(self, req, fp, code, msg, headers, newurl):	315	def redirect_request(self, req, fp, code, msg, headers, newurl):
311	newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)	316	m = req.get_method()
312	newreq.get_method = req.get_method	317	if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
313	return newreq	318	or code in (301, 302, 303) and m == "POST")):
		319	raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
		320
		321	# Strictly (according to RFC 2616), 301 or 302 in response to
		322	# a POST MUST NOT cause a redirection without confirmation
		323	# from the user (of urllib.request, in this case). In practice,
		324	# essentially all clients do redirect in this case, so we do
		325	# the same.
		326
		327	# Be conciliant with URIs containing a space. This is mainly
		328	# redundant with the more complete encoding done in http_error_302(),
		329	# but it is kept for compatibility with other callers.
		330	newurl = newurl.replace(' ', '%20')
		331
		332	CONTENT_HEADERS = ("content-length", "content-type")
		333	newheaders = {k: v for k, v in req.headers.items()
		334	if k.lower() not in CONTENT_HEADERS}
		335	return urllib.request.Request(newurl,
		336	method="HEAD" if m == "HEAD" else "GET",
		337	headers=newheaders,
		338	origin_req_host=req.origin_req_host,
		339	unverifiable=True)
		340
		341	http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
314		342
315	# We need to update the environment here as both the proxy and HTTPS	343	# We need to update the environment here as both the proxy and HTTPS
316	# handlers need variables set. The proxy needs http_proxy and friends to	344	# handlers need variables set. The proxy needs http_proxy and friends to
@@ -343,14 +371,14 @@ class Wget(FetchMethod):
343	opener = urllib.request.build_opener(*handlers)	371	opener = urllib.request.build_opener(*handlers)
344		372
345	try:	373	try:
346	uri_base = ud.url.split(";")[0]	374	parts = urllib.parse.urlparse(ud.url.split(";")[0])
347	uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)	375	uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
348	r = urllib.request.Request(uri)	376	r = urllib.request.Request(uri)
349	r.get_method = lambda: "HEAD"	377	r.get_method = lambda: "HEAD"
350	# Some servers (FusionForge, as used on Alioth) require that the	378	# Some servers (FusionForge, as used on Alioth) require that the
351	# optional Accept header is set.	379	# optional Accept header is set.
352	r.add_header("Accept", "/")	380	r.add_header("Accept", "/")
353	r.add_header("User-Agent", self.user_agent)	381	r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
354	def add_basic_auth(login_str, request):	382	def add_basic_auth(login_str, request):
355	'''Adds Basic auth to http request, pass in login:password as string'''	383	'''Adds Basic auth to http request, pass in login:password as string'''
356	import base64	384	import base64
@@ -370,7 +398,7 @@ class Wget(FetchMethod):
370	except (FileNotFoundError, netrc.NetrcParseError):	398	except (FileNotFoundError, netrc.NetrcParseError):
371	pass	399	pass
372		400
373	with opener.open(r, timeout=30) as response:	401	with opener.open(r, timeout=100) as response:
374	pass	402	pass
375	except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:	403	except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
376	if try_again:	404	if try_again:
@@ -457,7 +485,7 @@ class Wget(FetchMethod):
457	f = tempfile.NamedTemporaryFile()	485	f = tempfile.NamedTemporaryFile()
458	with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:	486	with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
459	fetchcmd = self.basecmd	487	fetchcmd = self.basecmd
460	fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"	488	fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
461	try:	489	try:
462	self._runwget(ud, d, fetchcmd, True, workdir=workdir)	490	self._runwget(ud, d, fetchcmd, True, workdir=workdir)
463	fetchresult = f.read()	491	fetchresult = f.read()
@@ -617,13 +645,17 @@ class Wget(FetchMethod):
617		645
618	sanity check to ensure same name and type.	646	sanity check to ensure same name and type.
619	"""	647	"""
620	package = ud.path.split("/")[-1]	648	if 'downloadfilename' in ud.parm:
		649	package = ud.parm['downloadfilename']
		650	else:
		651	package = ud.path.split("/")[-1]
621	current_version = ['', d.getVar('PV'), '']	652	current_version = ['', d.getVar('PV'), '']
622		653
623	"""possible to have no version in pkg name, such as spectrum-fw"""	654	"""possible to have no version in pkg name, such as spectrum-fw"""
624	if not re.search(r"\d+", package):	655	if not re.search(r"\d+", package):
625	current_version[1] = re.sub('_', '.', current_version[1])	656	current_version[1] = re.sub('_', '.', current_version[1])
626	current_version[1] = re.sub('-', '.', current_version[1])	657	current_version[1] = re.sub('-', '.', current_version[1])
		658	bb.debug(3, "latest_versionstring: no version found in %s" % package)
627	return (current_version[1], '')	659	return (current_version[1], '')
628		660
629	package_regex = self._init_regexes(package, ud, d)	661	package_regex = self._init_regexes(package, ud, d)