summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/fetch2/wget.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r--bitbake/lib/bb/fetch2/wget.py108
1 files changed, 70 insertions, 38 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index fbfa6938ac..7e43d3bc97 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -53,11 +53,6 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
53class Wget(FetchMethod): 53class Wget(FetchMethod):
54 """Class to fetch urls via 'wget'""" 54 """Class to fetch urls via 'wget'"""
55 55
56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
57 # with the standard wget/urllib User-Agent, so pretend to be a modern
58 # browser.
59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
60
61 def check_certs(self, d): 56 def check_certs(self, d):
62 """ 57 """
63 Should certificates be checked? 58 Should certificates be checked?
@@ -83,11 +78,11 @@ class Wget(FetchMethod):
83 else: 78 else:
84 ud.basename = os.path.basename(ud.path) 79 ud.basename = os.path.basename(ud.path)
85 80
86 ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 81 ud.localfile = ud.basename
87 if not ud.localfile: 82 if not ud.localfile:
88 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 83 ud.localfile = ud.host + ud.path.replace("/", ".")
89 84
90 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30" 85 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
91 86
92 if ud.type == 'ftp' or ud.type == 'ftps': 87 if ud.type == 'ftp' or ud.type == 'ftps':
93 self.basecmd += " --passive-ftp" 88 self.basecmd += " --passive-ftp"
@@ -101,16 +96,17 @@ class Wget(FetchMethod):
101 96
102 logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) 97 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
103 bb.fetch2.check_network_access(d, command, ud.url) 98 bb.fetch2.check_network_access(d, command, ud.url)
104 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 99 runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
105 100
106 def download(self, ud, d): 101 def download(self, ud, d):
107 """Fetch urls""" 102 """Fetch urls"""
108 103
109 fetchcmd = self.basecmd 104 fetchcmd = self.basecmd
110 105
111 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" 106 dldir = os.path.realpath(d.getVar("DL_DIR"))
107 localpath = os.path.join(dldir, ud.localfile) + ".tmp"
112 bb.utils.mkdirhier(os.path.dirname(localpath)) 108 bb.utils.mkdirhier(os.path.dirname(localpath))
113 fetchcmd += " -O %s" % shlex.quote(localpath) 109 fetchcmd += " --output-document=%s" % shlex.quote(localpath)
114 110
115 if ud.user and ud.pswd: 111 if ud.user and ud.pswd:
116 fetchcmd += " --auth-no-challenge" 112 fetchcmd += " --auth-no-challenge"
@@ -126,14 +122,18 @@ class Wget(FetchMethod):
126 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) 122 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
127 123
128 uri = ud.url.split(";")[0] 124 uri = ud.url.split(";")[0]
129 if os.path.exists(ud.localpath): 125 fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
130 # file exists, but we didnt complete it.. trying again..
131 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
132 else:
133 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
134
135 self._runwget(ud, d, fetchcmd, False) 126 self._runwget(ud, d, fetchcmd, False)
136 127
128 # Sanity check since wget can pretend it succeed when it didn't
129 # Also, this used to happen if sourceforge sent us to the mirror page
130 if not os.path.exists(localpath):
131 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
132
133 if os.path.getsize(localpath) == 0:
134 os.remove(localpath)
135 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
136
137 # Try and verify any checksum now, meaning if it isn't correct, we don't remove the 137 # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
138 # original file, which might be a race (imagine two recipes referencing the same 138 # original file, which might be a race (imagine two recipes referencing the same
139 # source, one with an incorrect checksum) 139 # source, one with an incorrect checksum)
@@ -143,15 +143,6 @@ class Wget(FetchMethod):
143 # Our lock prevents multiple writers but mirroring code may grab incomplete files 143 # Our lock prevents multiple writers but mirroring code may grab incomplete files
144 os.rename(localpath, localpath[:-4]) 144 os.rename(localpath, localpath[:-4])
145 145
146 # Sanity check since wget can pretend it succeed when it didn't
147 # Also, this used to happen if sourceforge sent us to the mirror page
148 if not os.path.exists(ud.localpath):
149 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
150
151 if os.path.getsize(ud.localpath) == 0:
152 os.remove(ud.localpath)
153 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
154
155 return True 146 return True
156 147
157 def checkstatus(self, fetch, ud, d, try_again=True): 148 def checkstatus(self, fetch, ud, d, try_again=True):
@@ -243,7 +234,12 @@ class Wget(FetchMethod):
243 fetch.connection_cache.remove_connection(h.host, h.port) 234 fetch.connection_cache.remove_connection(h.host, h.port)
244 raise urllib.error.URLError(err) 235 raise urllib.error.URLError(err)
245 else: 236 else:
246 r = h.getresponse() 237 try:
238 r = h.getresponse()
239 except TimeoutError as e:
240 if fetch.connection_cache:
241 fetch.connection_cache.remove_connection(h.host, h.port)
242 raise TimeoutError(e)
247 243
248 # Pick apart the HTTPResponse object to get the addinfourl 244 # Pick apart the HTTPResponse object to get the addinfourl
249 # object initialized properly. 245 # object initialized properly.
@@ -304,13 +300,45 @@ class Wget(FetchMethod):
304 300
305 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 301 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
306 """ 302 """
307 urllib2.HTTPRedirectHandler resets the method to GET on redirect, 303 urllib2.HTTPRedirectHandler before 3.13 has two flaws:
308 when we want to follow redirects using the original method. 304
305 It resets the method to GET on redirect when we want to follow
306 redirects using the original method (typically HEAD). This was fixed
307 in 759e8e7.
308
309 It also doesn't handle 308 (Permanent Redirect). This was fixed in
310 c379bc5.
311
312 Until we depend on Python 3.13 onwards, copy the redirect_request
313 method to fix these issues.
309 """ 314 """
310 def redirect_request(self, req, fp, code, msg, headers, newurl): 315 def redirect_request(self, req, fp, code, msg, headers, newurl):
311 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 316 m = req.get_method()
312 newreq.get_method = req.get_method 317 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
313 return newreq 318 or code in (301, 302, 303) and m == "POST")):
319 raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
320
321 # Strictly (according to RFC 2616), 301 or 302 in response to
322 # a POST MUST NOT cause a redirection without confirmation
323 # from the user (of urllib.request, in this case). In practice,
324 # essentially all clients do redirect in this case, so we do
325 # the same.
326
327 # Be conciliant with URIs containing a space. This is mainly
328 # redundant with the more complete encoding done in http_error_302(),
329 # but it is kept for compatibility with other callers.
330 newurl = newurl.replace(' ', '%20')
331
332 CONTENT_HEADERS = ("content-length", "content-type")
333 newheaders = {k: v for k, v in req.headers.items()
334 if k.lower() not in CONTENT_HEADERS}
335 return urllib.request.Request(newurl,
336 method="HEAD" if m == "HEAD" else "GET",
337 headers=newheaders,
338 origin_req_host=req.origin_req_host,
339 unverifiable=True)
340
341 http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
314 342
315 # We need to update the environment here as both the proxy and HTTPS 343 # We need to update the environment here as both the proxy and HTTPS
316 # handlers need variables set. The proxy needs http_proxy and friends to 344 # handlers need variables set. The proxy needs http_proxy and friends to
@@ -343,14 +371,14 @@ class Wget(FetchMethod):
343 opener = urllib.request.build_opener(*handlers) 371 opener = urllib.request.build_opener(*handlers)
344 372
345 try: 373 try:
346 uri_base = ud.url.split(";")[0] 374 parts = urllib.parse.urlparse(ud.url.split(";")[0])
347 uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path) 375 uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
348 r = urllib.request.Request(uri) 376 r = urllib.request.Request(uri)
349 r.get_method = lambda: "HEAD" 377 r.get_method = lambda: "HEAD"
350 # Some servers (FusionForge, as used on Alioth) require that the 378 # Some servers (FusionForge, as used on Alioth) require that the
351 # optional Accept header is set. 379 # optional Accept header is set.
352 r.add_header("Accept", "*/*") 380 r.add_header("Accept", "*/*")
353 r.add_header("User-Agent", self.user_agent) 381 r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
354 def add_basic_auth(login_str, request): 382 def add_basic_auth(login_str, request):
355 '''Adds Basic auth to http request, pass in login:password as string''' 383 '''Adds Basic auth to http request, pass in login:password as string'''
356 import base64 384 import base64
@@ -370,7 +398,7 @@ class Wget(FetchMethod):
370 except (FileNotFoundError, netrc.NetrcParseError): 398 except (FileNotFoundError, netrc.NetrcParseError):
371 pass 399 pass
372 400
373 with opener.open(r, timeout=30) as response: 401 with opener.open(r, timeout=100) as response:
374 pass 402 pass
375 except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: 403 except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
376 if try_again: 404 if try_again:
@@ -457,7 +485,7 @@ class Wget(FetchMethod):
457 f = tempfile.NamedTemporaryFile() 485 f = tempfile.NamedTemporaryFile()
458 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 486 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
459 fetchcmd = self.basecmd 487 fetchcmd = self.basecmd
460 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" 488 fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
461 try: 489 try:
462 self._runwget(ud, d, fetchcmd, True, workdir=workdir) 490 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
463 fetchresult = f.read() 491 fetchresult = f.read()
@@ -617,13 +645,17 @@ class Wget(FetchMethod):
617 645
618 sanity check to ensure same name and type. 646 sanity check to ensure same name and type.
619 """ 647 """
620 package = ud.path.split("/")[-1] 648 if 'downloadfilename' in ud.parm:
649 package = ud.parm['downloadfilename']
650 else:
651 package = ud.path.split("/")[-1]
621 current_version = ['', d.getVar('PV'), ''] 652 current_version = ['', d.getVar('PV'), '']
622 653
623 """possible to have no version in pkg name, such as spectrum-fw""" 654 """possible to have no version in pkg name, such as spectrum-fw"""
624 if not re.search(r"\d+", package): 655 if not re.search(r"\d+", package):
625 current_version[1] = re.sub('_', '.', current_version[1]) 656 current_version[1] = re.sub('_', '.', current_version[1])
626 current_version[1] = re.sub('-', '.', current_version[1]) 657 current_version[1] = re.sub('-', '.', current_version[1])
658 bb.debug(3, "latest_versionstring: no version found in %s" % package)
627 return (current_version[1], '') 659 return (current_version[1], '')
628 660
629 package_regex = self._init_regexes(package, ud, d) 661 package_regex = self._init_regexes(package, ud, d)