diff options
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r-- | bitbake/lib/bb/fetch2/wget.py | 108 |
1 files changed, 70 insertions, 38 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py index fbfa6938ac..7e43d3bc97 100644 --- a/bitbake/lib/bb/fetch2/wget.py +++ b/bitbake/lib/bb/fetch2/wget.py | |||
@@ -53,11 +53,6 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler): | |||
53 | class Wget(FetchMethod): | 53 | class Wget(FetchMethod): |
54 | """Class to fetch urls via 'wget'""" | 54 | """Class to fetch urls via 'wget'""" |
55 | 55 | ||
56 | # CDNs like CloudFlare may do a 'browser integrity test' which can fail | ||
57 | # with the standard wget/urllib User-Agent, so pretend to be a modern | ||
58 | # browser. | ||
59 | user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" | ||
60 | |||
61 | def check_certs(self, d): | 56 | def check_certs(self, d): |
62 | """ | 57 | """ |
63 | Should certificates be checked? | 58 | Should certificates be checked? |
@@ -83,11 +78,11 @@ class Wget(FetchMethod): | |||
83 | else: | 78 | else: |
84 | ud.basename = os.path.basename(ud.path) | 79 | ud.basename = os.path.basename(ud.path) |
85 | 80 | ||
86 | ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) | 81 | ud.localfile = ud.basename |
87 | if not ud.localfile: | 82 | if not ud.localfile: |
88 | ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) | 83 | ud.localfile = ud.host + ud.path.replace("/", ".") |
89 | 84 | ||
90 | self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30" | 85 | self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100" |
91 | 86 | ||
92 | if ud.type == 'ftp' or ud.type == 'ftps': | 87 | if ud.type == 'ftp' or ud.type == 'ftps': |
93 | self.basecmd += " --passive-ftp" | 88 | self.basecmd += " --passive-ftp" |
@@ -101,16 +96,17 @@ class Wget(FetchMethod): | |||
101 | 96 | ||
102 | logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) | 97 | logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) |
103 | bb.fetch2.check_network_access(d, command, ud.url) | 98 | bb.fetch2.check_network_access(d, command, ud.url) |
104 | runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) | 99 | runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir) |
105 | 100 | ||
106 | def download(self, ud, d): | 101 | def download(self, ud, d): |
107 | """Fetch urls""" | 102 | """Fetch urls""" |
108 | 103 | ||
109 | fetchcmd = self.basecmd | 104 | fetchcmd = self.basecmd |
110 | 105 | ||
111 | localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" | 106 | dldir = os.path.realpath(d.getVar("DL_DIR")) |
107 | localpath = os.path.join(dldir, ud.localfile) + ".tmp" | ||
112 | bb.utils.mkdirhier(os.path.dirname(localpath)) | 108 | bb.utils.mkdirhier(os.path.dirname(localpath)) |
113 | fetchcmd += " -O %s" % shlex.quote(localpath) | 109 | fetchcmd += " --output-document=%s" % shlex.quote(localpath) |
114 | 110 | ||
115 | if ud.user and ud.pswd: | 111 | if ud.user and ud.pswd: |
116 | fetchcmd += " --auth-no-challenge" | 112 | fetchcmd += " --auth-no-challenge" |
@@ -126,14 +122,18 @@ class Wget(FetchMethod): | |||
126 | fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) | 122 | fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) |
127 | 123 | ||
128 | uri = ud.url.split(";")[0] | 124 | uri = ud.url.split(";")[0] |
129 | if os.path.exists(ud.localpath): | 125 | fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri) |
130 | # file exists, but we didnt complete it.. trying again.. | ||
131 | fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) | ||
132 | else: | ||
133 | fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) | ||
134 | |||
135 | self._runwget(ud, d, fetchcmd, False) | 126 | self._runwget(ud, d, fetchcmd, False) |
136 | 127 | ||
128 | # Sanity check since wget can pretend it succeed when it didn't | ||
129 | # Also, this used to happen if sourceforge sent us to the mirror page | ||
130 | if not os.path.exists(localpath): | ||
131 | raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri) | ||
132 | |||
133 | if os.path.getsize(localpath) == 0: | ||
134 | os.remove(localpath) | ||
135 | raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) | ||
136 | |||
137 | # Try and verify any checksum now, meaning if it isn't correct, we don't remove the | 137 | # Try and verify any checksum now, meaning if it isn't correct, we don't remove the |
138 | # original file, which might be a race (imagine two recipes referencing the same | 138 | # original file, which might be a race (imagine two recipes referencing the same |
139 | # source, one with an incorrect checksum) | 139 | # source, one with an incorrect checksum) |
@@ -143,15 +143,6 @@ class Wget(FetchMethod): | |||
143 | # Our lock prevents multiple writers but mirroring code may grab incomplete files | 143 | # Our lock prevents multiple writers but mirroring code may grab incomplete files |
144 | os.rename(localpath, localpath[:-4]) | 144 | os.rename(localpath, localpath[:-4]) |
145 | 145 | ||
146 | # Sanity check since wget can pretend it succeed when it didn't | ||
147 | # Also, this used to happen if sourceforge sent us to the mirror page | ||
148 | if not os.path.exists(ud.localpath): | ||
149 | raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) | ||
150 | |||
151 | if os.path.getsize(ud.localpath) == 0: | ||
152 | os.remove(ud.localpath) | ||
153 | raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) | ||
154 | |||
155 | return True | 146 | return True |
156 | 147 | ||
157 | def checkstatus(self, fetch, ud, d, try_again=True): | 148 | def checkstatus(self, fetch, ud, d, try_again=True): |
@@ -243,7 +234,12 @@ class Wget(FetchMethod): | |||
243 | fetch.connection_cache.remove_connection(h.host, h.port) | 234 | fetch.connection_cache.remove_connection(h.host, h.port) |
244 | raise urllib.error.URLError(err) | 235 | raise urllib.error.URLError(err) |
245 | else: | 236 | else: |
246 | r = h.getresponse() | 237 | try: |
238 | r = h.getresponse() | ||
239 | except TimeoutError as e: | ||
240 | if fetch.connection_cache: | ||
241 | fetch.connection_cache.remove_connection(h.host, h.port) | ||
242 | raise TimeoutError(e) | ||
247 | 243 | ||
248 | # Pick apart the HTTPResponse object to get the addinfourl | 244 | # Pick apart the HTTPResponse object to get the addinfourl |
249 | # object initialized properly. | 245 | # object initialized properly. |
@@ -304,13 +300,45 @@ class Wget(FetchMethod): | |||
304 | 300 | ||
305 | class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): | 301 | class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): |
306 | """ | 302 | """ |
307 | urllib2.HTTPRedirectHandler resets the method to GET on redirect, | 303 | urllib2.HTTPRedirectHandler before 3.13 has two flaws: |
308 | when we want to follow redirects using the original method. | 304 | |
305 | It resets the method to GET on redirect when we want to follow | ||
306 | redirects using the original method (typically HEAD). This was fixed | ||
307 | in 759e8e7. | ||
308 | |||
309 | It also doesn't handle 308 (Permanent Redirect). This was fixed in | ||
310 | c379bc5. | ||
311 | |||
312 | Until we depend on Python 3.13 onwards, copy the redirect_request | ||
313 | method to fix these issues. | ||
309 | """ | 314 | """ |
310 | def redirect_request(self, req, fp, code, msg, headers, newurl): | 315 | def redirect_request(self, req, fp, code, msg, headers, newurl): |
311 | newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) | 316 | m = req.get_method() |
312 | newreq.get_method = req.get_method | 317 | if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD") |
313 | return newreq | 318 | or code in (301, 302, 303) and m == "POST")): |
319 | raise urllib.HTTPError(req.full_url, code, msg, headers, fp) | ||
320 | |||
321 | # Strictly (according to RFC 2616), 301 or 302 in response to | ||
322 | # a POST MUST NOT cause a redirection without confirmation | ||
323 | # from the user (of urllib.request, in this case). In practice, | ||
324 | # essentially all clients do redirect in this case, so we do | ||
325 | # the same. | ||
326 | |||
327 | # Be conciliant with URIs containing a space. This is mainly | ||
328 | # redundant with the more complete encoding done in http_error_302(), | ||
329 | # but it is kept for compatibility with other callers. | ||
330 | newurl = newurl.replace(' ', '%20') | ||
331 | |||
332 | CONTENT_HEADERS = ("content-length", "content-type") | ||
333 | newheaders = {k: v for k, v in req.headers.items() | ||
334 | if k.lower() not in CONTENT_HEADERS} | ||
335 | return urllib.request.Request(newurl, | ||
336 | method="HEAD" if m == "HEAD" else "GET", | ||
337 | headers=newheaders, | ||
338 | origin_req_host=req.origin_req_host, | ||
339 | unverifiable=True) | ||
340 | |||
341 | http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302 | ||
314 | 342 | ||
315 | # We need to update the environment here as both the proxy and HTTPS | 343 | # We need to update the environment here as both the proxy and HTTPS |
316 | # handlers need variables set. The proxy needs http_proxy and friends to | 344 | # handlers need variables set. The proxy needs http_proxy and friends to |
@@ -343,14 +371,14 @@ class Wget(FetchMethod): | |||
343 | opener = urllib.request.build_opener(*handlers) | 371 | opener = urllib.request.build_opener(*handlers) |
344 | 372 | ||
345 | try: | 373 | try: |
346 | uri_base = ud.url.split(";")[0] | 374 | parts = urllib.parse.urlparse(ud.url.split(";")[0]) |
347 | uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path) | 375 | uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path) |
348 | r = urllib.request.Request(uri) | 376 | r = urllib.request.Request(uri) |
349 | r.get_method = lambda: "HEAD" | 377 | r.get_method = lambda: "HEAD" |
350 | # Some servers (FusionForge, as used on Alioth) require that the | 378 | # Some servers (FusionForge, as used on Alioth) require that the |
351 | # optional Accept header is set. | 379 | # optional Accept header is set. |
352 | r.add_header("Accept", "*/*") | 380 | r.add_header("Accept", "*/*") |
353 | r.add_header("User-Agent", self.user_agent) | 381 | r.add_header("User-Agent", "bitbake/{}".format(bb.__version__)) |
354 | def add_basic_auth(login_str, request): | 382 | def add_basic_auth(login_str, request): |
355 | '''Adds Basic auth to http request, pass in login:password as string''' | 383 | '''Adds Basic auth to http request, pass in login:password as string''' |
356 | import base64 | 384 | import base64 |
@@ -370,7 +398,7 @@ class Wget(FetchMethod): | |||
370 | except (FileNotFoundError, netrc.NetrcParseError): | 398 | except (FileNotFoundError, netrc.NetrcParseError): |
371 | pass | 399 | pass |
372 | 400 | ||
373 | with opener.open(r, timeout=30) as response: | 401 | with opener.open(r, timeout=100) as response: |
374 | pass | 402 | pass |
375 | except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: | 403 | except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: |
376 | if try_again: | 404 | if try_again: |
@@ -457,7 +485,7 @@ class Wget(FetchMethod): | |||
457 | f = tempfile.NamedTemporaryFile() | 485 | f = tempfile.NamedTemporaryFile() |
458 | with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: | 486 | with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: |
459 | fetchcmd = self.basecmd | 487 | fetchcmd = self.basecmd |
460 | fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" | 488 | fetchcmd += " --output-document=%s '%s'" % (f.name, uri) |
461 | try: | 489 | try: |
462 | self._runwget(ud, d, fetchcmd, True, workdir=workdir) | 490 | self._runwget(ud, d, fetchcmd, True, workdir=workdir) |
463 | fetchresult = f.read() | 491 | fetchresult = f.read() |
@@ -617,13 +645,17 @@ class Wget(FetchMethod): | |||
617 | 645 | ||
618 | sanity check to ensure same name and type. | 646 | sanity check to ensure same name and type. |
619 | """ | 647 | """ |
620 | package = ud.path.split("/")[-1] | 648 | if 'downloadfilename' in ud.parm: |
649 | package = ud.parm['downloadfilename'] | ||
650 | else: | ||
651 | package = ud.path.split("/")[-1] | ||
621 | current_version = ['', d.getVar('PV'), ''] | 652 | current_version = ['', d.getVar('PV'), ''] |
622 | 653 | ||
623 | """possible to have no version in pkg name, such as spectrum-fw""" | 654 | """possible to have no version in pkg name, such as spectrum-fw""" |
624 | if not re.search(r"\d+", package): | 655 | if not re.search(r"\d+", package): |
625 | current_version[1] = re.sub('_', '.', current_version[1]) | 656 | current_version[1] = re.sub('_', '.', current_version[1]) |
626 | current_version[1] = re.sub('-', '.', current_version[1]) | 657 | current_version[1] = re.sub('-', '.', current_version[1]) |
658 | bb.debug(3, "latest_versionstring: no version found in %s" % package) | ||
627 | return (current_version[1], '') | 659 | return (current_version[1], '') |
628 | 660 | ||
629 | package_regex = self._init_regexes(package, ud, d) | 661 | package_regex = self._init_regexes(package, ud, d) |