summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/fetch2/wget.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r--bitbake/lib/bb/fetch2/wget.py181
1 files changed, 111 insertions, 70 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index 6d82f3af07..fbfa6938ac 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -26,7 +26,6 @@ from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError 26from bb.fetch2 import FetchError
27from bb.fetch2 import logger 27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd 28from bb.fetch2 import runfetchcmd
29from bb.utils import export_proxies
30from bs4 import BeautifulSoup 29from bs4 import BeautifulSoup
31from bs4 import SoupStrainer 30from bs4 import SoupStrainer
32 31
@@ -52,18 +51,24 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
52 51
53 52
54class Wget(FetchMethod): 53class Wget(FetchMethod):
54 """Class to fetch urls via 'wget'"""
55 55
56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail 56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
57 # with the standard wget/urllib User-Agent, so pretend to be a modern 57 # with the standard wget/urllib User-Agent, so pretend to be a modern
58 # browser. 58 # browser.
59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" 59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
60 60
61 """Class to fetch urls via 'wget'""" 61 def check_certs(self, d):
62 """
63 Should certificates be checked?
64 """
65 return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
66
62 def supports(self, ud, d): 67 def supports(self, ud, d):
63 """ 68 """
64 Check to see if a given url can be fetched with wget. 69 Check to see if a given url can be fetched with wget.
65 """ 70 """
66 return ud.type in ['http', 'https', 'ftp'] 71 return ud.type in ['http', 'https', 'ftp', 'ftps']
67 72
68 def recommends_checksum(self, urldata): 73 def recommends_checksum(self, urldata):
69 return True 74 return True
@@ -82,7 +87,13 @@ class Wget(FetchMethod):
82 if not ud.localfile: 87 if not ud.localfile:
83 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 88 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
84 89
85 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" 90 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30"
91
92 if ud.type == 'ftp' or ud.type == 'ftps':
93 self.basecmd += " --passive-ftp"
94
95 if not self.check_certs(d):
96 self.basecmd += " --no-check-certificate"
86 97
87 def _runwget(self, ud, d, command, quiet, workdir=None): 98 def _runwget(self, ud, d, command, quiet, workdir=None):
88 99
@@ -97,13 +108,22 @@ class Wget(FetchMethod):
97 108
98 fetchcmd = self.basecmd 109 fetchcmd = self.basecmd
99 110
100 if 'downloadfilename' in ud.parm: 111 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
101 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) 112 bb.utils.mkdirhier(os.path.dirname(localpath))
102 bb.utils.mkdirhier(os.path.dirname(localpath)) 113 fetchcmd += " -O %s" % shlex.quote(localpath)
103 fetchcmd += " -O %s" % shlex.quote(localpath)
104 114
105 if ud.user and ud.pswd: 115 if ud.user and ud.pswd:
106 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) 116 fetchcmd += " --auth-no-challenge"
117 if ud.parm.get("redirectauth", "1") == "1":
118 # An undocumented feature of wget is that if the
119 # username/password are specified on the URI, wget will only
120 # send the Authorization header to the first host and not to
121 # any hosts that it is redirected to. With the increasing
122 # usage of temporary AWS URLs, this difference now matters as
123 # AWS will reject any request that has authentication both in
124 # the query parameters (from the redirect) and in the
125 # Authorization header.
126 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
107 127
108 uri = ud.url.split(";")[0] 128 uri = ud.url.split(";")[0]
109 if os.path.exists(ud.localpath): 129 if os.path.exists(ud.localpath):
@@ -114,6 +134,15 @@ class Wget(FetchMethod):
114 134
115 self._runwget(ud, d, fetchcmd, False) 135 self._runwget(ud, d, fetchcmd, False)
116 136
137 # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
138 # original file, which might be a race (imagine two recipes referencing the same
139 # source, one with an incorrect checksum)
140 bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
141
142 # Remove the ".tmp" and move the file into position atomically
143 # Our lock prevents multiple writers but mirroring code may grab incomplete files
144 os.rename(localpath, localpath[:-4])
145
117 # Sanity check since wget can pretend it succeed when it didn't 146 # Sanity check since wget can pretend it succeed when it didn't
118 # Also, this used to happen if sourceforge sent us to the mirror page 147 # Also, this used to happen if sourceforge sent us to the mirror page
119 if not os.path.exists(ud.localpath): 148 if not os.path.exists(ud.localpath):
@@ -209,7 +238,7 @@ class Wget(FetchMethod):
209 # We let the request fail and expect it to be 238 # We let the request fail and expect it to be
210 # tried once more ("try_again" in check_status()), 239 # tried once more ("try_again" in check_status()),
211 # with the dead connection removed from the cache. 240 # with the dead connection removed from the cache.
212 # If it still fails, we give up, which can happend for bad 241 # If it still fails, we give up, which can happen for bad
213 # HTTP proxy settings. 242 # HTTP proxy settings.
214 fetch.connection_cache.remove_connection(h.host, h.port) 243 fetch.connection_cache.remove_connection(h.host, h.port)
215 raise urllib.error.URLError(err) 244 raise urllib.error.URLError(err)
@@ -282,64 +311,76 @@ class Wget(FetchMethod):
282 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 311 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
283 newreq.get_method = req.get_method 312 newreq.get_method = req.get_method
284 return newreq 313 return newreq
285 exported_proxies = export_proxies(d)
286
287 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
288 if exported_proxies:
289 handlers.append(urllib.request.ProxyHandler())
290 handlers.append(CacheHTTPHandler())
291 # Since Python 2.7.9 ssl cert validation is enabled by default
292 # see PEP-0476, this causes verification errors on some https servers
293 # so disable by default.
294 import ssl
295 if hasattr(ssl, '_create_unverified_context'):
296 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
297 opener = urllib.request.build_opener(*handlers)
298
299 try:
300 uri = ud.url.split(";")[0]
301 r = urllib.request.Request(uri)
302 r.get_method = lambda: "HEAD"
303 # Some servers (FusionForge, as used on Alioth) require that the
304 # optional Accept header is set.
305 r.add_header("Accept", "*/*")
306 r.add_header("User-Agent", self.user_agent)
307 def add_basic_auth(login_str, request):
308 '''Adds Basic auth to http request, pass in login:password as string'''
309 import base64
310 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
311 authheader = "Basic %s" % encodeuser
312 r.add_header("Authorization", authheader)
313
314 if ud.user and ud.pswd:
315 add_basic_auth(ud.user + ':' + ud.pswd, r)
316 314
317 try: 315 # We need to update the environment here as both the proxy and HTTPS
318 import netrc 316 # handlers need variables set. The proxy needs http_proxy and friends to
319 n = netrc.netrc() 317 # be set, and HTTPSHandler ends up calling into openssl to load the
320 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) 318 # certificates. In buildtools configurations this will be looking at the
321 add_basic_auth("%s:%s" % (login, password), r) 319 # wrong place for certificates by default: we set SSL_CERT_FILE to the
322 except (TypeError, ImportError, IOError, netrc.NetrcParseError): 320 # right location in the buildtools environment script but as BitBake
323 pass 321 # prunes prunes the environment this is lost. When binaries are executed
324 322 # runfetchcmd ensures these values are in the environment, but this is
325 with opener.open(r) as response: 323 # pure Python so we need to update the environment.
326 pass 324 #
327 except urllib.error.URLError as e: 325 # Avoid tramping the environment too much by using bb.utils.environment
328 if try_again: 326 # to scope the changes to the build_opener request, which is when the
329 logger.debug2("checkstatus: trying again") 327 # environment lookups happen.
330 return self.checkstatus(fetch, ud, d, False) 328 newenv = bb.fetch2.get_fetcher_environment(d)
329
330 with bb.utils.environment(**newenv):
331 import ssl
332
333 if self.check_certs(d):
334 context = ssl.create_default_context()
331 else: 335 else:
332 # debug for now to avoid spamming the logs in e.g. remote sstate searches 336 context = ssl._create_unverified_context()
333 logger.debug2("checkstatus() urlopen failed: %s" % e) 337
334 return False 338 handlers = [FixedHTTPRedirectHandler,
335 except ConnectionResetError as e: 339 HTTPMethodFallback,
336 if try_again: 340 urllib.request.ProxyHandler(),
337 logger.debug2("checkstatus: trying again") 341 CacheHTTPHandler(),
338 return self.checkstatus(fetch, ud, d, False) 342 urllib.request.HTTPSHandler(context=context)]
339 else: 343 opener = urllib.request.build_opener(*handlers)
340 # debug for now to avoid spamming the logs in e.g. remote sstate searches 344
341 logger.debug2("checkstatus() urlopen failed: %s" % e) 345 try:
342 return False 346 uri_base = ud.url.split(";")[0]
347 uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path)
348 r = urllib.request.Request(uri)
349 r.get_method = lambda: "HEAD"
350 # Some servers (FusionForge, as used on Alioth) require that the
351 # optional Accept header is set.
352 r.add_header("Accept", "*/*")
353 r.add_header("User-Agent", self.user_agent)
354 def add_basic_auth(login_str, request):
355 '''Adds Basic auth to http request, pass in login:password as string'''
356 import base64
357 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
358 authheader = "Basic %s" % encodeuser
359 r.add_header("Authorization", authheader)
360
361 if ud.user and ud.pswd:
362 add_basic_auth(ud.user + ':' + ud.pswd, r)
363
364 try:
365 import netrc
366 auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
367 if auth_data:
368 login, _, password = auth_data
369 add_basic_auth("%s:%s" % (login, password), r)
370 except (FileNotFoundError, netrc.NetrcParseError):
371 pass
372
373 with opener.open(r, timeout=30) as response:
374 pass
375 except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
376 if try_again:
377 logger.debug2("checkstatus: trying again")
378 return self.checkstatus(fetch, ud, d, False)
379 else:
380 # debug for now to avoid spamming the logs in e.g. remote sstate searches
381 logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
382 return False
383
343 return True 384 return True
344 385
345 def _parse_path(self, regex, s): 386 def _parse_path(self, regex, s):
@@ -472,7 +513,7 @@ class Wget(FetchMethod):
472 version_dir = ['', '', ''] 513 version_dir = ['', '', '']
473 version = ['', '', ''] 514 version = ['', '', '']
474 515
475 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") 516 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
476 s = dirver_regex.search(dirver) 517 s = dirver_regex.search(dirver)
477 if s: 518 if s:
478 version_dir[1] = s.group('ver') 519 version_dir[1] = s.group('ver')
@@ -548,7 +589,7 @@ class Wget(FetchMethod):
548 589
549 # src.rpm extension was added only for rpm package. Can be removed if the rpm 590 # src.rpm extension was added only for rpm package. Can be removed if the rpm
550 # packaged will always be considered as having to be manually upgraded 591 # packaged will always be considered as having to be manually upgraded
551 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" 592 psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
552 593
553 # match name, version and archive type of a package 594 # match name, version and archive type of a package
554 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 595 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
@@ -599,10 +640,10 @@ class Wget(FetchMethod):
599 # search for version matches on folders inside the path, like: 640 # search for version matches on folders inside the path, like:
600 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 641 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
601 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 642 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
602 m = dirver_regex.search(path) 643 m = dirver_regex.findall(path)
603 if m: 644 if m:
604 pn = d.getVar('PN') 645 pn = d.getVar('PN')
605 dirver = m.group('dirver') 646 dirver = m[-1][0]
606 647
607 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) 648 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
608 if not dirver_pn_regex.search(dirver): 649 if not dirver_pn_regex.search(dirver):