diff options
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r-- | bitbake/lib/bb/fetch2/wget.py | 181 |
1 files changed, 111 insertions, 70 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py index 6d82f3af07..fbfa6938ac 100644 --- a/bitbake/lib/bb/fetch2/wget.py +++ b/bitbake/lib/bb/fetch2/wget.py | |||
@@ -26,7 +26,6 @@ from bb.fetch2 import FetchMethod | |||
26 | from bb.fetch2 import FetchError | 26 | from bb.fetch2 import FetchError |
27 | from bb.fetch2 import logger | 27 | from bb.fetch2 import logger |
28 | from bb.fetch2 import runfetchcmd | 28 | from bb.fetch2 import runfetchcmd |
29 | from bb.utils import export_proxies | ||
30 | from bs4 import BeautifulSoup | 29 | from bs4 import BeautifulSoup |
31 | from bs4 import SoupStrainer | 30 | from bs4 import SoupStrainer |
32 | 31 | ||
@@ -52,18 +51,24 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler): | |||
52 | 51 | ||
53 | 52 | ||
54 | class Wget(FetchMethod): | 53 | class Wget(FetchMethod): |
54 | """Class to fetch urls via 'wget'""" | ||
55 | 55 | ||
56 | # CDNs like CloudFlare may do a 'browser integrity test' which can fail | 56 | # CDNs like CloudFlare may do a 'browser integrity test' which can fail |
57 | # with the standard wget/urllib User-Agent, so pretend to be a modern | 57 | # with the standard wget/urllib User-Agent, so pretend to be a modern |
58 | # browser. | 58 | # browser. |
59 | user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" | 59 | user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" |
60 | 60 | ||
61 | """Class to fetch urls via 'wget'""" | 61 | def check_certs(self, d): |
62 | """ | ||
63 | Should certificates be checked? | ||
64 | """ | ||
65 | return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0" | ||
66 | |||
62 | def supports(self, ud, d): | 67 | def supports(self, ud, d): |
63 | """ | 68 | """ |
64 | Check to see if a given url can be fetched with wget. | 69 | Check to see if a given url can be fetched with wget. |
65 | """ | 70 | """ |
66 | return ud.type in ['http', 'https', 'ftp'] | 71 | return ud.type in ['http', 'https', 'ftp', 'ftps'] |
67 | 72 | ||
68 | def recommends_checksum(self, urldata): | 73 | def recommends_checksum(self, urldata): |
69 | return True | 74 | return True |
@@ -82,7 +87,13 @@ class Wget(FetchMethod): | |||
82 | if not ud.localfile: | 87 | if not ud.localfile: |
83 | ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) | 88 | ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) |
84 | 89 | ||
85 | self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" | 90 | self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30" |
91 | |||
92 | if ud.type == 'ftp' or ud.type == 'ftps': | ||
93 | self.basecmd += " --passive-ftp" | ||
94 | |||
95 | if not self.check_certs(d): | ||
96 | self.basecmd += " --no-check-certificate" | ||
86 | 97 | ||
87 | def _runwget(self, ud, d, command, quiet, workdir=None): | 98 | def _runwget(self, ud, d, command, quiet, workdir=None): |
88 | 99 | ||
@@ -97,13 +108,22 @@ class Wget(FetchMethod): | |||
97 | 108 | ||
98 | fetchcmd = self.basecmd | 109 | fetchcmd = self.basecmd |
99 | 110 | ||
100 | if 'downloadfilename' in ud.parm: | 111 | localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" |
101 | localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) | 112 | bb.utils.mkdirhier(os.path.dirname(localpath)) |
102 | bb.utils.mkdirhier(os.path.dirname(localpath)) | 113 | fetchcmd += " -O %s" % shlex.quote(localpath) |
103 | fetchcmd += " -O %s" % shlex.quote(localpath) | ||
104 | 114 | ||
105 | if ud.user and ud.pswd: | 115 | if ud.user and ud.pswd: |
106 | fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) | 116 | fetchcmd += " --auth-no-challenge" |
117 | if ud.parm.get("redirectauth", "1") == "1": | ||
118 | # An undocumented feature of wget is that if the | ||
119 | # username/password are specified on the URI, wget will only | ||
120 | # send the Authorization header to the first host and not to | ||
121 | # any hosts that it is redirected to. With the increasing | ||
122 | # usage of temporary AWS URLs, this difference now matters as | ||
123 | # AWS will reject any request that has authentication both in | ||
124 | # the query parameters (from the redirect) and in the | ||
125 | # Authorization header. | ||
126 | fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) | ||
107 | 127 | ||
108 | uri = ud.url.split(";")[0] | 128 | uri = ud.url.split(";")[0] |
109 | if os.path.exists(ud.localpath): | 129 | if os.path.exists(ud.localpath): |
@@ -114,6 +134,15 @@ class Wget(FetchMethod): | |||
114 | 134 | ||
115 | self._runwget(ud, d, fetchcmd, False) | 135 | self._runwget(ud, d, fetchcmd, False) |
116 | 136 | ||
137 | # Try and verify any checksum now, meaning if it isn't correct, we don't remove the | ||
138 | # original file, which might be a race (imagine two recipes referencing the same | ||
139 | # source, one with an incorrect checksum) | ||
140 | bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False) | ||
141 | |||
142 | # Remove the ".tmp" and move the file into position atomically | ||
143 | # Our lock prevents multiple writers but mirroring code may grab incomplete files | ||
144 | os.rename(localpath, localpath[:-4]) | ||
145 | |||
117 | # Sanity check since wget can pretend it succeed when it didn't | 146 | # Sanity check since wget can pretend it succeed when it didn't |
118 | # Also, this used to happen if sourceforge sent us to the mirror page | 147 | # Also, this used to happen if sourceforge sent us to the mirror page |
119 | if not os.path.exists(ud.localpath): | 148 | if not os.path.exists(ud.localpath): |
@@ -209,7 +238,7 @@ class Wget(FetchMethod): | |||
209 | # We let the request fail and expect it to be | 238 | # We let the request fail and expect it to be |
210 | # tried once more ("try_again" in check_status()), | 239 | # tried once more ("try_again" in check_status()), |
211 | # with the dead connection removed from the cache. | 240 | # with the dead connection removed from the cache. |
212 | # If it still fails, we give up, which can happend for bad | 241 | # If it still fails, we give up, which can happen for bad |
213 | # HTTP proxy settings. | 242 | # HTTP proxy settings. |
214 | fetch.connection_cache.remove_connection(h.host, h.port) | 243 | fetch.connection_cache.remove_connection(h.host, h.port) |
215 | raise urllib.error.URLError(err) | 244 | raise urllib.error.URLError(err) |
@@ -282,64 +311,76 @@ class Wget(FetchMethod): | |||
282 | newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) | 311 | newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) |
283 | newreq.get_method = req.get_method | 312 | newreq.get_method = req.get_method |
284 | return newreq | 313 | return newreq |
285 | exported_proxies = export_proxies(d) | ||
286 | |||
287 | handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback] | ||
288 | if exported_proxies: | ||
289 | handlers.append(urllib.request.ProxyHandler()) | ||
290 | handlers.append(CacheHTTPHandler()) | ||
291 | # Since Python 2.7.9 ssl cert validation is enabled by default | ||
292 | # see PEP-0476, this causes verification errors on some https servers | ||
293 | # so disable by default. | ||
294 | import ssl | ||
295 | if hasattr(ssl, '_create_unverified_context'): | ||
296 | handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context())) | ||
297 | opener = urllib.request.build_opener(*handlers) | ||
298 | |||
299 | try: | ||
300 | uri = ud.url.split(";")[0] | ||
301 | r = urllib.request.Request(uri) | ||
302 | r.get_method = lambda: "HEAD" | ||
303 | # Some servers (FusionForge, as used on Alioth) require that the | ||
304 | # optional Accept header is set. | ||
305 | r.add_header("Accept", "*/*") | ||
306 | r.add_header("User-Agent", self.user_agent) | ||
307 | def add_basic_auth(login_str, request): | ||
308 | '''Adds Basic auth to http request, pass in login:password as string''' | ||
309 | import base64 | ||
310 | encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") | ||
311 | authheader = "Basic %s" % encodeuser | ||
312 | r.add_header("Authorization", authheader) | ||
313 | |||
314 | if ud.user and ud.pswd: | ||
315 | add_basic_auth(ud.user + ':' + ud.pswd, r) | ||
316 | 314 | ||
317 | try: | 315 | # We need to update the environment here as both the proxy and HTTPS |
318 | import netrc | 316 | # handlers need variables set. The proxy needs http_proxy and friends to |
319 | n = netrc.netrc() | 317 | # be set, and HTTPSHandler ends up calling into openssl to load the |
320 | login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) | 318 | # certificates. In buildtools configurations this will be looking at the |
321 | add_basic_auth("%s:%s" % (login, password), r) | 319 | # wrong place for certificates by default: we set SSL_CERT_FILE to the |
322 | except (TypeError, ImportError, IOError, netrc.NetrcParseError): | 320 | # right location in the buildtools environment script but as BitBake |
323 | pass | 321 | # prunes prunes the environment this is lost. When binaries are executed |
324 | 322 | # runfetchcmd ensures these values are in the environment, but this is | |
325 | with opener.open(r) as response: | 323 | # pure Python so we need to update the environment. |
326 | pass | 324 | # |
327 | except urllib.error.URLError as e: | 325 | # Avoid tramping the environment too much by using bb.utils.environment |
328 | if try_again: | 326 | # to scope the changes to the build_opener request, which is when the |
329 | logger.debug2("checkstatus: trying again") | 327 | # environment lookups happen. |
330 | return self.checkstatus(fetch, ud, d, False) | 328 | newenv = bb.fetch2.get_fetcher_environment(d) |
329 | |||
330 | with bb.utils.environment(**newenv): | ||
331 | import ssl | ||
332 | |||
333 | if self.check_certs(d): | ||
334 | context = ssl.create_default_context() | ||
331 | else: | 335 | else: |
332 | # debug for now to avoid spamming the logs in e.g. remote sstate searches | 336 | context = ssl._create_unverified_context() |
333 | logger.debug2("checkstatus() urlopen failed: %s" % e) | 337 | |
334 | return False | 338 | handlers = [FixedHTTPRedirectHandler, |
335 | except ConnectionResetError as e: | 339 | HTTPMethodFallback, |
336 | if try_again: | 340 | urllib.request.ProxyHandler(), |
337 | logger.debug2("checkstatus: trying again") | 341 | CacheHTTPHandler(), |
338 | return self.checkstatus(fetch, ud, d, False) | 342 | urllib.request.HTTPSHandler(context=context)] |
339 | else: | 343 | opener = urllib.request.build_opener(*handlers) |
340 | # debug for now to avoid spamming the logs in e.g. remote sstate searches | 344 | |
341 | logger.debug2("checkstatus() urlopen failed: %s" % e) | 345 | try: |
342 | return False | 346 | uri_base = ud.url.split(";")[0] |
347 | uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path) | ||
348 | r = urllib.request.Request(uri) | ||
349 | r.get_method = lambda: "HEAD" | ||
350 | # Some servers (FusionForge, as used on Alioth) require that the | ||
351 | # optional Accept header is set. | ||
352 | r.add_header("Accept", "*/*") | ||
353 | r.add_header("User-Agent", self.user_agent) | ||
354 | def add_basic_auth(login_str, request): | ||
355 | '''Adds Basic auth to http request, pass in login:password as string''' | ||
356 | import base64 | ||
357 | encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") | ||
358 | authheader = "Basic %s" % encodeuser | ||
359 | r.add_header("Authorization", authheader) | ||
360 | |||
361 | if ud.user and ud.pswd: | ||
362 | add_basic_auth(ud.user + ':' + ud.pswd, r) | ||
363 | |||
364 | try: | ||
365 | import netrc | ||
366 | auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname) | ||
367 | if auth_data: | ||
368 | login, _, password = auth_data | ||
369 | add_basic_auth("%s:%s" % (login, password), r) | ||
370 | except (FileNotFoundError, netrc.NetrcParseError): | ||
371 | pass | ||
372 | |||
373 | with opener.open(r, timeout=30) as response: | ||
374 | pass | ||
375 | except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: | ||
376 | if try_again: | ||
377 | logger.debug2("checkstatus: trying again") | ||
378 | return self.checkstatus(fetch, ud, d, False) | ||
379 | else: | ||
380 | # debug for now to avoid spamming the logs in e.g. remote sstate searches | ||
381 | logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e)) | ||
382 | return False | ||
383 | |||
343 | return True | 384 | return True |
344 | 385 | ||
345 | def _parse_path(self, regex, s): | 386 | def _parse_path(self, regex, s): |
@@ -472,7 +513,7 @@ class Wget(FetchMethod): | |||
472 | version_dir = ['', '', ''] | 513 | version_dir = ['', '', ''] |
473 | version = ['', '', ''] | 514 | version = ['', '', ''] |
474 | 515 | ||
475 | dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") | 516 | dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))") |
476 | s = dirver_regex.search(dirver) | 517 | s = dirver_regex.search(dirver) |
477 | if s: | 518 | if s: |
478 | version_dir[1] = s.group('ver') | 519 | version_dir[1] = s.group('ver') |
@@ -548,7 +589,7 @@ class Wget(FetchMethod): | |||
548 | 589 | ||
549 | # src.rpm extension was added only for rpm package. Can be removed if the rpm | 590 | # src.rpm extension was added only for rpm package. Can be removed if the rpm |
550 | # packaged will always be considered as having to be manually upgraded | 591 | # packaged will always be considered as having to be manually upgraded |
551 | psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" | 592 | psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)" |
552 | 593 | ||
553 | # match name, version and archive type of a package | 594 | # match name, version and archive type of a package |
554 | package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" | 595 | package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" |
@@ -599,10 +640,10 @@ class Wget(FetchMethod): | |||
599 | # search for version matches on folders inside the path, like: | 640 | # search for version matches on folders inside the path, like: |
600 | # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz | 641 | # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz |
601 | dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") | 642 | dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") |
602 | m = dirver_regex.search(path) | 643 | m = dirver_regex.findall(path) |
603 | if m: | 644 | if m: |
604 | pn = d.getVar('PN') | 645 | pn = d.getVar('PN') |
605 | dirver = m.group('dirver') | 646 | dirver = m[-1][0] |
606 | 647 | ||
607 | dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) | 648 | dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) |
608 | if not dirver_pn_regex.search(dirver): | 649 | if not dirver_pn_regex.search(dirver): |