summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/fetch2/wget.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r--bitbake/lib/bb/fetch2/wget.py266
1 files changed, 171 insertions, 95 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index 6d82f3af07..4d19e2134b 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -26,7 +26,6 @@ from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError 26from bb.fetch2 import FetchError
27from bb.fetch2 import logger 27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd 28from bb.fetch2 import runfetchcmd
29from bb.utils import export_proxies
30from bs4 import BeautifulSoup 29from bs4 import BeautifulSoup
31from bs4 import SoupStrainer 30from bs4 import SoupStrainer
32 31
@@ -52,18 +51,19 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
52 51
53 52
54class Wget(FetchMethod): 53class Wget(FetchMethod):
54 """Class to fetch urls via 'wget'"""
55 55
56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail 56 def check_certs(self, d):
57 # with the standard wget/urllib User-Agent, so pretend to be a modern 57 """
58 # browser. 58 Should certificates be checked?
59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" 59 """
60 return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
60 61
61 """Class to fetch urls via 'wget'"""
62 def supports(self, ud, d): 62 def supports(self, ud, d):
63 """ 63 """
64 Check to see if a given url can be fetched with wget. 64 Check to see if a given url can be fetched with wget.
65 """ 65 """
66 return ud.type in ['http', 'https', 'ftp'] 66 return ud.type in ['http', 'https', 'ftp', 'ftps']
67 67
68 def recommends_checksum(self, urldata): 68 def recommends_checksum(self, urldata):
69 return True 69 return True
@@ -78,11 +78,17 @@ class Wget(FetchMethod):
78 else: 78 else:
79 ud.basename = os.path.basename(ud.path) 79 ud.basename = os.path.basename(ud.path)
80 80
81 ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 81 ud.localfile = ud.basename
82 if not ud.localfile: 82 if not ud.localfile:
83 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 83 ud.localfile = ud.host + ud.path.replace("/", ".")
84 84
85 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" 85 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
86
87 if ud.type == 'ftp' or ud.type == 'ftps':
88 self.basecmd += " --passive-ftp"
89
90 if not self.check_certs(d):
91 self.basecmd += " --no-check-certificate"
86 92
87 def _runwget(self, ud, d, command, quiet, workdir=None): 93 def _runwget(self, ud, d, command, quiet, workdir=None):
88 94
@@ -90,39 +96,53 @@ class Wget(FetchMethod):
90 96
91 logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) 97 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
92 bb.fetch2.check_network_access(d, command, ud.url) 98 bb.fetch2.check_network_access(d, command, ud.url)
93 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 99 runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
94 100
95 def download(self, ud, d): 101 def download(self, ud, d):
96 """Fetch urls""" 102 """Fetch urls"""
97 103
98 fetchcmd = self.basecmd 104 fetchcmd = self.basecmd
99 105
100 if 'downloadfilename' in ud.parm: 106 dldir = os.path.realpath(d.getVar("DL_DIR"))
101 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) 107 localpath = os.path.join(dldir, ud.localfile) + ".tmp"
102 bb.utils.mkdirhier(os.path.dirname(localpath)) 108 bb.utils.mkdirhier(os.path.dirname(localpath))
103 fetchcmd += " -O %s" % shlex.quote(localpath) 109 fetchcmd += " --output-document=%s" % shlex.quote(localpath)
104 110
105 if ud.user and ud.pswd: 111 if ud.user and ud.pswd:
106 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) 112 fetchcmd += " --auth-no-challenge"
113 if ud.parm.get("redirectauth", "1") == "1":
114 # An undocumented feature of wget is that if the
115 # username/password are specified on the URI, wget will only
116 # send the Authorization header to the first host and not to
117 # any hosts that it is redirected to. With the increasing
118 # usage of temporary AWS URLs, this difference now matters as
119 # AWS will reject any request that has authentication both in
120 # the query parameters (from the redirect) and in the
121 # Authorization header.
122 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
107 123
108 uri = ud.url.split(";")[0] 124 uri = ud.url.split(";")[0]
109 if os.path.exists(ud.localpath): 125 fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
110 # file exists, but we didnt complete it.. trying again..
111 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
112 else:
113 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
114
115 self._runwget(ud, d, fetchcmd, False) 126 self._runwget(ud, d, fetchcmd, False)
116 127
117 # Sanity check since wget can pretend it succeed when it didn't 128 # Sanity check since wget can pretend it succeed when it didn't
118 # Also, this used to happen if sourceforge sent us to the mirror page 129 # Also, this used to happen if sourceforge sent us to the mirror page
119 if not os.path.exists(ud.localpath): 130 if not os.path.exists(localpath):
120 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) 131 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
121 132
122 if os.path.getsize(ud.localpath) == 0: 133 if os.path.getsize(localpath) == 0:
123 os.remove(ud.localpath) 134 os.remove(localpath)
124 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) 135 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
125 136
137 # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
138 # original file, which might be a race (imagine two recipes referencing the same
139 # source, one with an incorrect checksum)
140 bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
141
142 # Remove the ".tmp" and move the file into position atomically
143 # Our lock prevents multiple writers but mirroring code may grab incomplete files
144 os.rename(localpath, localpath[:-4])
145
126 return True 146 return True
127 147
128 def checkstatus(self, fetch, ud, d, try_again=True): 148 def checkstatus(self, fetch, ud, d, try_again=True):
@@ -209,12 +229,17 @@ class Wget(FetchMethod):
209 # We let the request fail and expect it to be 229 # We let the request fail and expect it to be
210 # tried once more ("try_again" in check_status()), 230 # tried once more ("try_again" in check_status()),
211 # with the dead connection removed from the cache. 231 # with the dead connection removed from the cache.
212 # If it still fails, we give up, which can happend for bad 232 # If it still fails, we give up, which can happen for bad
213 # HTTP proxy settings. 233 # HTTP proxy settings.
214 fetch.connection_cache.remove_connection(h.host, h.port) 234 fetch.connection_cache.remove_connection(h.host, h.port)
215 raise urllib.error.URLError(err) 235 raise urllib.error.URLError(err)
216 else: 236 else:
217 r = h.getresponse() 237 try:
238 r = h.getresponse()
239 except TimeoutError as e:
240 if fetch.connection_cache:
241 fetch.connection_cache.remove_connection(h.host, h.port)
242 raise TimeoutError(e)
218 243
219 # Pick apart the HTTPResponse object to get the addinfourl 244 # Pick apart the HTTPResponse object to get the addinfourl
220 # object initialized properly. 245 # object initialized properly.
@@ -275,71 +300,118 @@ class Wget(FetchMethod):
275 300
276 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 301 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
277 """ 302 """
278 urllib2.HTTPRedirectHandler resets the method to GET on redirect, 303 urllib2.HTTPRedirectHandler before 3.13 has two flaws:
279 when we want to follow redirects using the original method. 304
305 It resets the method to GET on redirect when we want to follow
306 redirects using the original method (typically HEAD). This was fixed
307 in 759e8e7.
308
309 It also doesn't handle 308 (Permanent Redirect). This was fixed in
310 c379bc5.
311
312 Until we depend on Python 3.13 onwards, copy the redirect_request
313 method to fix these issues.
280 """ 314 """
281 def redirect_request(self, req, fp, code, msg, headers, newurl): 315 def redirect_request(self, req, fp, code, msg, headers, newurl):
282 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 316 m = req.get_method()
283 newreq.get_method = req.get_method 317 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
284 return newreq 318 or code in (301, 302, 303) and m == "POST")):
285 exported_proxies = export_proxies(d) 319 raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
286 320
287 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback] 321 # Strictly (according to RFC 2616), 301 or 302 in response to
288 if exported_proxies: 322 # a POST MUST NOT cause a redirection without confirmation
289 handlers.append(urllib.request.ProxyHandler()) 323 # from the user (of urllib.request, in this case). In practice,
290 handlers.append(CacheHTTPHandler()) 324 # essentially all clients do redirect in this case, so we do
291 # Since Python 2.7.9 ssl cert validation is enabled by default 325 # the same.
292 # see PEP-0476, this causes verification errors on some https servers 326
293 # so disable by default. 327 # Be conciliant with URIs containing a space. This is mainly
294 import ssl 328 # redundant with the more complete encoding done in http_error_302(),
295 if hasattr(ssl, '_create_unverified_context'): 329 # but it is kept for compatibility with other callers.
296 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context())) 330 newurl = newurl.replace(' ', '%20')
297 opener = urllib.request.build_opener(*handlers) 331
298 332 CONTENT_HEADERS = ("content-length", "content-type")
299 try: 333 newheaders = {k: v for k, v in req.headers.items()
300 uri = ud.url.split(";")[0] 334 if k.lower() not in CONTENT_HEADERS}
301 r = urllib.request.Request(uri) 335 return urllib.request.Request(newurl,
302 r.get_method = lambda: "HEAD" 336 method="HEAD" if m == "HEAD" else "GET",
303 # Some servers (FusionForge, as used on Alioth) require that the 337 headers=newheaders,
304 # optional Accept header is set. 338 origin_req_host=req.origin_req_host,
305 r.add_header("Accept", "*/*") 339 unverifiable=True)
306 r.add_header("User-Agent", self.user_agent) 340
307 def add_basic_auth(login_str, request): 341 http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
308 '''Adds Basic auth to http request, pass in login:password as string''' 342
309 import base64 343 # We need to update the environment here as both the proxy and HTTPS
310 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") 344 # handlers need variables set. The proxy needs http_proxy and friends to
311 authheader = "Basic %s" % encodeuser 345 # be set, and HTTPSHandler ends up calling into openssl to load the
312 r.add_header("Authorization", authheader) 346 # certificates. In buildtools configurations this will be looking at the
313 347 # wrong place for certificates by default: we set SSL_CERT_FILE to the
314 if ud.user and ud.pswd: 348 # right location in the buildtools environment script but as BitBake
315 add_basic_auth(ud.user + ':' + ud.pswd, r) 349 # prunes prunes the environment this is lost. When binaries are executed
350 # runfetchcmd ensures these values are in the environment, but this is
351 # pure Python so we need to update the environment.
352 #
353 # Avoid tramping the environment too much by using bb.utils.environment
354 # to scope the changes to the build_opener request, which is when the
355 # environment lookups happen.
356 newenv = bb.fetch2.get_fetcher_environment(d)
357
358 with bb.utils.environment(**newenv):
359 import ssl
360
361 if self.check_certs(d):
362 context = ssl.create_default_context()
363 else:
364 context = ssl._create_unverified_context()
365
366 handlers = [FixedHTTPRedirectHandler,
367 HTTPMethodFallback,
368 urllib.request.ProxyHandler(),
369 CacheHTTPHandler(),
370 urllib.request.HTTPSHandler(context=context)]
371 opener = urllib.request.build_opener(*handlers)
316 372
317 try: 373 try:
318 import netrc 374 parts = urllib.parse.urlparse(ud.url.split(";")[0])
319 n = netrc.netrc() 375 if parts.query:
320 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) 376 uri = "{}://{}{}?{}".format(parts.scheme, parts.netloc, parts.path, parts.query)
321 add_basic_auth("%s:%s" % (login, password), r) 377 else:
322 except (TypeError, ImportError, IOError, netrc.NetrcParseError): 378 uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
323 pass 379 r = urllib.request.Request(uri)
324 380 r.get_method = lambda: "HEAD"
325 with opener.open(r) as response: 381 # Some servers (FusionForge, as used on Alioth) require that the
326 pass 382 # optional Accept header is set.
327 except urllib.error.URLError as e: 383 r.add_header("Accept", "*/*")
328 if try_again: 384 r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
329 logger.debug2("checkstatus: trying again") 385 def add_basic_auth(login_str, request):
330 return self.checkstatus(fetch, ud, d, False) 386 '''Adds Basic auth to http request, pass in login:password as string'''
331 else: 387 import base64
332 # debug for now to avoid spamming the logs in e.g. remote sstate searches 388 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
333 logger.debug2("checkstatus() urlopen failed: %s" % e) 389 authheader = "Basic %s" % encodeuser
334 return False 390 r.add_header("Authorization", authheader)
335 except ConnectionResetError as e: 391
336 if try_again: 392 if ud.user and ud.pswd:
337 logger.debug2("checkstatus: trying again") 393 add_basic_auth(ud.user + ':' + ud.pswd, r)
338 return self.checkstatus(fetch, ud, d, False) 394
339 else: 395 try:
340 # debug for now to avoid spamming the logs in e.g. remote sstate searches 396 import netrc
341 logger.debug2("checkstatus() urlopen failed: %s" % e) 397 auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
342 return False 398 if auth_data:
399 login, _, password = auth_data
400 add_basic_auth("%s:%s" % (login, password), r)
401 except (FileNotFoundError, netrc.NetrcParseError):
402 pass
403
404 with opener.open(r, timeout=100) as response:
405 pass
406 except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
407 if try_again:
408 logger.debug2("checkstatus: trying again")
409 return self.checkstatus(fetch, ud, d, False)
410 else:
411 # debug for now to avoid spamming the logs in e.g. remote sstate searches
412 logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
413 return False
414
343 return True 415 return True
344 416
345 def _parse_path(self, regex, s): 417 def _parse_path(self, regex, s):
@@ -416,7 +488,7 @@ class Wget(FetchMethod):
416 f = tempfile.NamedTemporaryFile() 488 f = tempfile.NamedTemporaryFile()
417 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 489 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
418 fetchcmd = self.basecmd 490 fetchcmd = self.basecmd
419 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" 491 fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
420 try: 492 try:
421 self._runwget(ud, d, fetchcmd, True, workdir=workdir) 493 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
422 fetchresult = f.read() 494 fetchresult = f.read()
@@ -472,7 +544,7 @@ class Wget(FetchMethod):
472 version_dir = ['', '', ''] 544 version_dir = ['', '', '']
473 version = ['', '', ''] 545 version = ['', '', '']
474 546
475 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") 547 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
476 s = dirver_regex.search(dirver) 548 s = dirver_regex.search(dirver)
477 if s: 549 if s:
478 version_dir[1] = s.group('ver') 550 version_dir[1] = s.group('ver')
@@ -548,7 +620,7 @@ class Wget(FetchMethod):
548 620
549 # src.rpm extension was added only for rpm package. Can be removed if the rpm 621 # src.rpm extension was added only for rpm package. Can be removed if the rpm
550 # packaged will always be considered as having to be manually upgraded 622 # packaged will always be considered as having to be manually upgraded
551 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" 623 psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
552 624
553 # match name, version and archive type of a package 625 # match name, version and archive type of a package
554 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 626 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
@@ -576,13 +648,17 @@ class Wget(FetchMethod):
576 648
577 sanity check to ensure same name and type. 649 sanity check to ensure same name and type.
578 """ 650 """
579 package = ud.path.split("/")[-1] 651 if 'downloadfilename' in ud.parm:
652 package = ud.parm['downloadfilename']
653 else:
654 package = ud.path.split("/")[-1]
580 current_version = ['', d.getVar('PV'), ''] 655 current_version = ['', d.getVar('PV'), '']
581 656
582 """possible to have no version in pkg name, such as spectrum-fw""" 657 """possible to have no version in pkg name, such as spectrum-fw"""
583 if not re.search(r"\d+", package): 658 if not re.search(r"\d+", package):
584 current_version[1] = re.sub('_', '.', current_version[1]) 659 current_version[1] = re.sub('_', '.', current_version[1])
585 current_version[1] = re.sub('-', '.', current_version[1]) 660 current_version[1] = re.sub('-', '.', current_version[1])
661 bb.debug(3, "latest_versionstring: no version found in %s" % package)
586 return (current_version[1], '') 662 return (current_version[1], '')
587 663
588 package_regex = self._init_regexes(package, ud, d) 664 package_regex = self._init_regexes(package, ud, d)
@@ -599,10 +675,10 @@ class Wget(FetchMethod):
599 # search for version matches on folders inside the path, like: 675 # search for version matches on folders inside the path, like:
600 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 676 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
601 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 677 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
602 m = dirver_regex.search(path) 678 m = dirver_regex.findall(path)
603 if m: 679 if m:
604 pn = d.getVar('PN') 680 pn = d.getVar('PN')
605 dirver = m.group('dirver') 681 dirver = m[-1][0]
606 682
607 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) 683 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
608 if not dirver_pn_regex.search(dirver): 684 if not dirver_pn_regex.search(dirver):