diff options
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r-- | bitbake/lib/bb/fetch2/wget.py | 111 |
1 files changed, 73 insertions, 38 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py index dc025800e6..7e43d3bc97 100644 --- a/bitbake/lib/bb/fetch2/wget.py +++ b/bitbake/lib/bb/fetch2/wget.py | |||
@@ -53,11 +53,6 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler): | |||
53 | class Wget(FetchMethod): | 53 | class Wget(FetchMethod): |
54 | """Class to fetch urls via 'wget'""" | 54 | """Class to fetch urls via 'wget'""" |
55 | 55 | ||
56 | # CDNs like CloudFlare may do a 'browser integrity test' which can fail | ||
57 | # with the standard wget/urllib User-Agent, so pretend to be a modern | ||
58 | # browser. | ||
59 | user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" | ||
60 | |||
61 | def check_certs(self, d): | 56 | def check_certs(self, d): |
62 | """ | 57 | """ |
63 | Should certificates be checked? | 58 | Should certificates be checked? |
@@ -83,11 +78,14 @@ class Wget(FetchMethod): | |||
83 | else: | 78 | else: |
84 | ud.basename = os.path.basename(ud.path) | 79 | ud.basename = os.path.basename(ud.path) |
85 | 80 | ||
86 | ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) | 81 | ud.localfile = ud.basename |
87 | if not ud.localfile: | 82 | if not ud.localfile: |
88 | ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) | 83 | ud.localfile = ud.host + ud.path.replace("/", ".") |
84 | |||
85 | self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100" | ||
89 | 86 | ||
90 | self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp" | 87 | if ud.type == 'ftp' or ud.type == 'ftps': |
88 | self.basecmd += " --passive-ftp" | ||
91 | 89 | ||
92 | if not self.check_certs(d): | 90 | if not self.check_certs(d): |
93 | self.basecmd += " --no-check-certificate" | 91 | self.basecmd += " --no-check-certificate" |
@@ -98,16 +96,17 @@ class Wget(FetchMethod): | |||
98 | 96 | ||
99 | logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) | 97 | logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) |
100 | bb.fetch2.check_network_access(d, command, ud.url) | 98 | bb.fetch2.check_network_access(d, command, ud.url) |
101 | runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) | 99 | runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir) |
102 | 100 | ||
103 | def download(self, ud, d): | 101 | def download(self, ud, d): |
104 | """Fetch urls""" | 102 | """Fetch urls""" |
105 | 103 | ||
106 | fetchcmd = self.basecmd | 104 | fetchcmd = self.basecmd |
107 | 105 | ||
108 | localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" | 106 | dldir = os.path.realpath(d.getVar("DL_DIR")) |
107 | localpath = os.path.join(dldir, ud.localfile) + ".tmp" | ||
109 | bb.utils.mkdirhier(os.path.dirname(localpath)) | 108 | bb.utils.mkdirhier(os.path.dirname(localpath)) |
110 | fetchcmd += " -O %s" % shlex.quote(localpath) | 109 | fetchcmd += " --output-document=%s" % shlex.quote(localpath) |
111 | 110 | ||
112 | if ud.user and ud.pswd: | 111 | if ud.user and ud.pswd: |
113 | fetchcmd += " --auth-no-challenge" | 112 | fetchcmd += " --auth-no-challenge" |
@@ -123,14 +122,18 @@ class Wget(FetchMethod): | |||
123 | fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) | 122 | fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) |
124 | 123 | ||
125 | uri = ud.url.split(";")[0] | 124 | uri = ud.url.split(";")[0] |
126 | if os.path.exists(ud.localpath): | 125 | fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri) |
127 | # file exists, but we didnt complete it.. trying again.. | ||
128 | fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) | ||
129 | else: | ||
130 | fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) | ||
131 | |||
132 | self._runwget(ud, d, fetchcmd, False) | 126 | self._runwget(ud, d, fetchcmd, False) |
133 | 127 | ||
128 | # Sanity check since wget can pretend it succeed when it didn't | ||
129 | # Also, this used to happen if sourceforge sent us to the mirror page | ||
130 | if not os.path.exists(localpath): | ||
131 | raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri) | ||
132 | |||
133 | if os.path.getsize(localpath) == 0: | ||
134 | os.remove(localpath) | ||
135 | raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) | ||
136 | |||
134 | # Try and verify any checksum now, meaning if it isn't correct, we don't remove the | 137 | # Try and verify any checksum now, meaning if it isn't correct, we don't remove the |
135 | # original file, which might be a race (imagine two recipes referencing the same | 138 | # original file, which might be a race (imagine two recipes referencing the same |
136 | # source, one with an incorrect checksum) | 139 | # source, one with an incorrect checksum) |
@@ -140,15 +143,6 @@ class Wget(FetchMethod): | |||
140 | # Our lock prevents multiple writers but mirroring code may grab incomplete files | 143 | # Our lock prevents multiple writers but mirroring code may grab incomplete files |
141 | os.rename(localpath, localpath[:-4]) | 144 | os.rename(localpath, localpath[:-4]) |
142 | 145 | ||
143 | # Sanity check since wget can pretend it succeed when it didn't | ||
144 | # Also, this used to happen if sourceforge sent us to the mirror page | ||
145 | if not os.path.exists(ud.localpath): | ||
146 | raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) | ||
147 | |||
148 | if os.path.getsize(ud.localpath) == 0: | ||
149 | os.remove(ud.localpath) | ||
150 | raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) | ||
151 | |||
152 | return True | 146 | return True |
153 | 147 | ||
154 | def checkstatus(self, fetch, ud, d, try_again=True): | 148 | def checkstatus(self, fetch, ud, d, try_again=True): |
@@ -240,7 +234,12 @@ class Wget(FetchMethod): | |||
240 | fetch.connection_cache.remove_connection(h.host, h.port) | 234 | fetch.connection_cache.remove_connection(h.host, h.port) |
241 | raise urllib.error.URLError(err) | 235 | raise urllib.error.URLError(err) |
242 | else: | 236 | else: |
243 | r = h.getresponse() | 237 | try: |
238 | r = h.getresponse() | ||
239 | except TimeoutError as e: | ||
240 | if fetch.connection_cache: | ||
241 | fetch.connection_cache.remove_connection(h.host, h.port) | ||
242 | raise TimeoutError(e) | ||
244 | 243 | ||
245 | # Pick apart the HTTPResponse object to get the addinfourl | 244 | # Pick apart the HTTPResponse object to get the addinfourl |
246 | # object initialized properly. | 245 | # object initialized properly. |
@@ -301,13 +300,45 @@ class Wget(FetchMethod): | |||
301 | 300 | ||
302 | class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): | 301 | class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): |
303 | """ | 302 | """ |
304 | urllib2.HTTPRedirectHandler resets the method to GET on redirect, | 303 | urllib2.HTTPRedirectHandler before 3.13 has two flaws: |
305 | when we want to follow redirects using the original method. | 304 | |
305 | It resets the method to GET on redirect when we want to follow | ||
306 | redirects using the original method (typically HEAD). This was fixed | ||
307 | in 759e8e7. | ||
308 | |||
309 | It also doesn't handle 308 (Permanent Redirect). This was fixed in | ||
310 | c379bc5. | ||
311 | |||
312 | Until we depend on Python 3.13 onwards, copy the redirect_request | ||
313 | method to fix these issues. | ||
306 | """ | 314 | """ |
307 | def redirect_request(self, req, fp, code, msg, headers, newurl): | 315 | def redirect_request(self, req, fp, code, msg, headers, newurl): |
308 | newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) | 316 | m = req.get_method() |
309 | newreq.get_method = req.get_method | 317 | if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD") |
310 | return newreq | 318 | or code in (301, 302, 303) and m == "POST")): |
319 | raise urllib.HTTPError(req.full_url, code, msg, headers, fp) | ||
320 | |||
321 | # Strictly (according to RFC 2616), 301 or 302 in response to | ||
322 | # a POST MUST NOT cause a redirection without confirmation | ||
323 | # from the user (of urllib.request, in this case). In practice, | ||
324 | # essentially all clients do redirect in this case, so we do | ||
325 | # the same. | ||
326 | |||
327 | # Be conciliant with URIs containing a space. This is mainly | ||
328 | # redundant with the more complete encoding done in http_error_302(), | ||
329 | # but it is kept for compatibility with other callers. | ||
330 | newurl = newurl.replace(' ', '%20') | ||
331 | |||
332 | CONTENT_HEADERS = ("content-length", "content-type") | ||
333 | newheaders = {k: v for k, v in req.headers.items() | ||
334 | if k.lower() not in CONTENT_HEADERS} | ||
335 | return urllib.request.Request(newurl, | ||
336 | method="HEAD" if m == "HEAD" else "GET", | ||
337 | headers=newheaders, | ||
338 | origin_req_host=req.origin_req_host, | ||
339 | unverifiable=True) | ||
340 | |||
341 | http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302 | ||
311 | 342 | ||
312 | # We need to update the environment here as both the proxy and HTTPS | 343 | # We need to update the environment here as both the proxy and HTTPS |
313 | # handlers need variables set. The proxy needs http_proxy and friends to | 344 | # handlers need variables set. The proxy needs http_proxy and friends to |
@@ -340,14 +371,14 @@ class Wget(FetchMethod): | |||
340 | opener = urllib.request.build_opener(*handlers) | 371 | opener = urllib.request.build_opener(*handlers) |
341 | 372 | ||
342 | try: | 373 | try: |
343 | uri_base = ud.url.split(";")[0] | 374 | parts = urllib.parse.urlparse(ud.url.split(";")[0]) |
344 | uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path) | 375 | uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path) |
345 | r = urllib.request.Request(uri) | 376 | r = urllib.request.Request(uri) |
346 | r.get_method = lambda: "HEAD" | 377 | r.get_method = lambda: "HEAD" |
347 | # Some servers (FusionForge, as used on Alioth) require that the | 378 | # Some servers (FusionForge, as used on Alioth) require that the |
348 | # optional Accept header is set. | 379 | # optional Accept header is set. |
349 | r.add_header("Accept", "*/*") | 380 | r.add_header("Accept", "*/*") |
350 | r.add_header("User-Agent", self.user_agent) | 381 | r.add_header("User-Agent", "bitbake/{}".format(bb.__version__)) |
351 | def add_basic_auth(login_str, request): | 382 | def add_basic_auth(login_str, request): |
352 | '''Adds Basic auth to http request, pass in login:password as string''' | 383 | '''Adds Basic auth to http request, pass in login:password as string''' |
353 | import base64 | 384 | import base64 |
@@ -367,7 +398,7 @@ class Wget(FetchMethod): | |||
367 | except (FileNotFoundError, netrc.NetrcParseError): | 398 | except (FileNotFoundError, netrc.NetrcParseError): |
368 | pass | 399 | pass |
369 | 400 | ||
370 | with opener.open(r, timeout=30) as response: | 401 | with opener.open(r, timeout=100) as response: |
371 | pass | 402 | pass |
372 | except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: | 403 | except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: |
373 | if try_again: | 404 | if try_again: |
@@ -454,7 +485,7 @@ class Wget(FetchMethod): | |||
454 | f = tempfile.NamedTemporaryFile() | 485 | f = tempfile.NamedTemporaryFile() |
455 | with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: | 486 | with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: |
456 | fetchcmd = self.basecmd | 487 | fetchcmd = self.basecmd |
457 | fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" | 488 | fetchcmd += " --output-document=%s '%s'" % (f.name, uri) |
458 | try: | 489 | try: |
459 | self._runwget(ud, d, fetchcmd, True, workdir=workdir) | 490 | self._runwget(ud, d, fetchcmd, True, workdir=workdir) |
460 | fetchresult = f.read() | 491 | fetchresult = f.read() |
@@ -614,13 +645,17 @@ class Wget(FetchMethod): | |||
614 | 645 | ||
615 | sanity check to ensure same name and type. | 646 | sanity check to ensure same name and type. |
616 | """ | 647 | """ |
617 | package = ud.path.split("/")[-1] | 648 | if 'downloadfilename' in ud.parm: |
649 | package = ud.parm['downloadfilename'] | ||
650 | else: | ||
651 | package = ud.path.split("/")[-1] | ||
618 | current_version = ['', d.getVar('PV'), ''] | 652 | current_version = ['', d.getVar('PV'), ''] |
619 | 653 | ||
620 | """possible to have no version in pkg name, such as spectrum-fw""" | 654 | """possible to have no version in pkg name, such as spectrum-fw""" |
621 | if not re.search(r"\d+", package): | 655 | if not re.search(r"\d+", package): |
622 | current_version[1] = re.sub('_', '.', current_version[1]) | 656 | current_version[1] = re.sub('_', '.', current_version[1]) |
623 | current_version[1] = re.sub('-', '.', current_version[1]) | 657 | current_version[1] = re.sub('-', '.', current_version[1]) |
658 | bb.debug(3, "latest_versionstring: no version found in %s" % package) | ||
624 | return (current_version[1], '') | 659 | return (current_version[1], '') |
625 | 660 | ||
626 | package_regex = self._init_regexes(package, ud, d) | 661 | package_regex = self._init_regexes(package, ud, d) |