summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/fetch2/wget.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r--bitbake/lib/bb/fetch2/wget.py111
1 files changed, 73 insertions, 38 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index dc025800e6..7e43d3bc97 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -53,11 +53,6 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
53class Wget(FetchMethod): 53class Wget(FetchMethod):
54 """Class to fetch urls via 'wget'""" 54 """Class to fetch urls via 'wget'"""
55 55
56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
57 # with the standard wget/urllib User-Agent, so pretend to be a modern
58 # browser.
59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
60
61 def check_certs(self, d): 56 def check_certs(self, d):
62 """ 57 """
63 Should certificates be checked? 58 Should certificates be checked?
@@ -83,11 +78,14 @@ class Wget(FetchMethod):
83 else: 78 else:
84 ud.basename = os.path.basename(ud.path) 79 ud.basename = os.path.basename(ud.path)
85 80
86 ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 81 ud.localfile = ud.basename
87 if not ud.localfile: 82 if not ud.localfile:
88 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 83 ud.localfile = ud.host + ud.path.replace("/", ".")
84
85 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
89 86
90 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp" 87 if ud.type == 'ftp' or ud.type == 'ftps':
88 self.basecmd += " --passive-ftp"
91 89
92 if not self.check_certs(d): 90 if not self.check_certs(d):
93 self.basecmd += " --no-check-certificate" 91 self.basecmd += " --no-check-certificate"
@@ -98,16 +96,17 @@ class Wget(FetchMethod):
98 96
99 logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) 97 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
100 bb.fetch2.check_network_access(d, command, ud.url) 98 bb.fetch2.check_network_access(d, command, ud.url)
101 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 99 runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
102 100
103 def download(self, ud, d): 101 def download(self, ud, d):
104 """Fetch urls""" 102 """Fetch urls"""
105 103
106 fetchcmd = self.basecmd 104 fetchcmd = self.basecmd
107 105
108 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" 106 dldir = os.path.realpath(d.getVar("DL_DIR"))
107 localpath = os.path.join(dldir, ud.localfile) + ".tmp"
109 bb.utils.mkdirhier(os.path.dirname(localpath)) 108 bb.utils.mkdirhier(os.path.dirname(localpath))
110 fetchcmd += " -O %s" % shlex.quote(localpath) 109 fetchcmd += " --output-document=%s" % shlex.quote(localpath)
111 110
112 if ud.user and ud.pswd: 111 if ud.user and ud.pswd:
113 fetchcmd += " --auth-no-challenge" 112 fetchcmd += " --auth-no-challenge"
@@ -123,14 +122,18 @@ class Wget(FetchMethod):
123 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) 122 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
124 123
125 uri = ud.url.split(";")[0] 124 uri = ud.url.split(";")[0]
126 if os.path.exists(ud.localpath): 125 fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
127 # file exists, but we didnt complete it.. trying again..
128 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
129 else:
130 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
131
132 self._runwget(ud, d, fetchcmd, False) 126 self._runwget(ud, d, fetchcmd, False)
133 127
128 # Sanity check since wget can pretend it succeed when it didn't
129 # Also, this used to happen if sourceforge sent us to the mirror page
130 if not os.path.exists(localpath):
131 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
132
133 if os.path.getsize(localpath) == 0:
134 os.remove(localpath)
135 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
136
134 # Try and verify any checksum now, meaning if it isn't correct, we don't remove the 137 # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
135 # original file, which might be a race (imagine two recipes referencing the same 138 # original file, which might be a race (imagine two recipes referencing the same
136 # source, one with an incorrect checksum) 139 # source, one with an incorrect checksum)
@@ -140,15 +143,6 @@ class Wget(FetchMethod):
140 # Our lock prevents multiple writers but mirroring code may grab incomplete files 143 # Our lock prevents multiple writers but mirroring code may grab incomplete files
141 os.rename(localpath, localpath[:-4]) 144 os.rename(localpath, localpath[:-4])
142 145
143 # Sanity check since wget can pretend it succeed when it didn't
144 # Also, this used to happen if sourceforge sent us to the mirror page
145 if not os.path.exists(ud.localpath):
146 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
147
148 if os.path.getsize(ud.localpath) == 0:
149 os.remove(ud.localpath)
150 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
151
152 return True 146 return True
153 147
154 def checkstatus(self, fetch, ud, d, try_again=True): 148 def checkstatus(self, fetch, ud, d, try_again=True):
@@ -240,7 +234,12 @@ class Wget(FetchMethod):
240 fetch.connection_cache.remove_connection(h.host, h.port) 234 fetch.connection_cache.remove_connection(h.host, h.port)
241 raise urllib.error.URLError(err) 235 raise urllib.error.URLError(err)
242 else: 236 else:
243 r = h.getresponse() 237 try:
238 r = h.getresponse()
239 except TimeoutError as e:
240 if fetch.connection_cache:
241 fetch.connection_cache.remove_connection(h.host, h.port)
242 raise TimeoutError(e)
244 243
245 # Pick apart the HTTPResponse object to get the addinfourl 244 # Pick apart the HTTPResponse object to get the addinfourl
246 # object initialized properly. 245 # object initialized properly.
@@ -301,13 +300,45 @@ class Wget(FetchMethod):
301 300
302 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 301 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
303 """ 302 """
304 urllib2.HTTPRedirectHandler resets the method to GET on redirect, 303 urllib2.HTTPRedirectHandler before 3.13 has two flaws:
305 when we want to follow redirects using the original method. 304
305 It resets the method to GET on redirect when we want to follow
306 redirects using the original method (typically HEAD). This was fixed
307 in 759e8e7.
308
309 It also doesn't handle 308 (Permanent Redirect). This was fixed in
310 c379bc5.
311
312 Until we depend on Python 3.13 onwards, copy the redirect_request
313 method to fix these issues.
306 """ 314 """
307 def redirect_request(self, req, fp, code, msg, headers, newurl): 315 def redirect_request(self, req, fp, code, msg, headers, newurl):
308 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 316 m = req.get_method()
309 newreq.get_method = req.get_method 317 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
310 return newreq 318 or code in (301, 302, 303) and m == "POST")):
319 raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
320
321 # Strictly (according to RFC 2616), 301 or 302 in response to
322 # a POST MUST NOT cause a redirection without confirmation
323 # from the user (of urllib.request, in this case). In practice,
324 # essentially all clients do redirect in this case, so we do
325 # the same.
326
327 # Be conciliant with URIs containing a space. This is mainly
328 # redundant with the more complete encoding done in http_error_302(),
329 # but it is kept for compatibility with other callers.
330 newurl = newurl.replace(' ', '%20')
331
332 CONTENT_HEADERS = ("content-length", "content-type")
333 newheaders = {k: v for k, v in req.headers.items()
334 if k.lower() not in CONTENT_HEADERS}
335 return urllib.request.Request(newurl,
336 method="HEAD" if m == "HEAD" else "GET",
337 headers=newheaders,
338 origin_req_host=req.origin_req_host,
339 unverifiable=True)
340
341 http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
311 342
312 # We need to update the environment here as both the proxy and HTTPS 343 # We need to update the environment here as both the proxy and HTTPS
313 # handlers need variables set. The proxy needs http_proxy and friends to 344 # handlers need variables set. The proxy needs http_proxy and friends to
@@ -340,14 +371,14 @@ class Wget(FetchMethod):
340 opener = urllib.request.build_opener(*handlers) 371 opener = urllib.request.build_opener(*handlers)
341 372
342 try: 373 try:
343 uri_base = ud.url.split(";")[0] 374 parts = urllib.parse.urlparse(ud.url.split(";")[0])
344 uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path) 375 uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
345 r = urllib.request.Request(uri) 376 r = urllib.request.Request(uri)
346 r.get_method = lambda: "HEAD" 377 r.get_method = lambda: "HEAD"
347 # Some servers (FusionForge, as used on Alioth) require that the 378 # Some servers (FusionForge, as used on Alioth) require that the
348 # optional Accept header is set. 379 # optional Accept header is set.
349 r.add_header("Accept", "*/*") 380 r.add_header("Accept", "*/*")
350 r.add_header("User-Agent", self.user_agent) 381 r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
351 def add_basic_auth(login_str, request): 382 def add_basic_auth(login_str, request):
352 '''Adds Basic auth to http request, pass in login:password as string''' 383 '''Adds Basic auth to http request, pass in login:password as string'''
353 import base64 384 import base64
@@ -367,7 +398,7 @@ class Wget(FetchMethod):
367 except (FileNotFoundError, netrc.NetrcParseError): 398 except (FileNotFoundError, netrc.NetrcParseError):
368 pass 399 pass
369 400
370 with opener.open(r, timeout=30) as response: 401 with opener.open(r, timeout=100) as response:
371 pass 402 pass
372 except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: 403 except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
373 if try_again: 404 if try_again:
@@ -454,7 +485,7 @@ class Wget(FetchMethod):
454 f = tempfile.NamedTemporaryFile() 485 f = tempfile.NamedTemporaryFile()
455 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 486 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
456 fetchcmd = self.basecmd 487 fetchcmd = self.basecmd
457 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" 488 fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
458 try: 489 try:
459 self._runwget(ud, d, fetchcmd, True, workdir=workdir) 490 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
460 fetchresult = f.read() 491 fetchresult = f.read()
@@ -614,13 +645,17 @@ class Wget(FetchMethod):
614 645
615 sanity check to ensure same name and type. 646 sanity check to ensure same name and type.
616 """ 647 """
617 package = ud.path.split("/")[-1] 648 if 'downloadfilename' in ud.parm:
649 package = ud.parm['downloadfilename']
650 else:
651 package = ud.path.split("/")[-1]
618 current_version = ['', d.getVar('PV'), ''] 652 current_version = ['', d.getVar('PV'), '']
619 653
620 """possible to have no version in pkg name, such as spectrum-fw""" 654 """possible to have no version in pkg name, such as spectrum-fw"""
621 if not re.search(r"\d+", package): 655 if not re.search(r"\d+", package):
622 current_version[1] = re.sub('_', '.', current_version[1]) 656 current_version[1] = re.sub('_', '.', current_version[1])
623 current_version[1] = re.sub('-', '.', current_version[1]) 657 current_version[1] = re.sub('-', '.', current_version[1])
658 bb.debug(3, "latest_versionstring: no version found in %s" % package)
624 return (current_version[1], '') 659 return (current_version[1], '')
625 660
626 package_regex = self._init_regexes(package, ud, d) 661 package_regex = self._init_regexes(package, ud, d)