summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/fetch2/wget.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r--bitbake/lib/bb/fetch2/wget.py263
1 files changed, 168 insertions, 95 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index 6d82f3af07..7e43d3bc97 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -26,7 +26,6 @@ from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError 26from bb.fetch2 import FetchError
27from bb.fetch2 import logger 27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd 28from bb.fetch2 import runfetchcmd
29from bb.utils import export_proxies
30from bs4 import BeautifulSoup 29from bs4 import BeautifulSoup
31from bs4 import SoupStrainer 30from bs4 import SoupStrainer
32 31
@@ -52,18 +51,19 @@ class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
52 51
53 52
54class Wget(FetchMethod): 53class Wget(FetchMethod):
54 """Class to fetch urls via 'wget'"""
55 55
56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail 56 def check_certs(self, d):
57 # with the standard wget/urllib User-Agent, so pretend to be a modern 57 """
58 # browser. 58 Should certificates be checked?
59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" 59 """
60 return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
60 61
61 """Class to fetch urls via 'wget'"""
62 def supports(self, ud, d): 62 def supports(self, ud, d):
63 """ 63 """
64 Check to see if a given url can be fetched with wget. 64 Check to see if a given url can be fetched with wget.
65 """ 65 """
66 return ud.type in ['http', 'https', 'ftp'] 66 return ud.type in ['http', 'https', 'ftp', 'ftps']
67 67
68 def recommends_checksum(self, urldata): 68 def recommends_checksum(self, urldata):
69 return True 69 return True
@@ -78,11 +78,17 @@ class Wget(FetchMethod):
78 else: 78 else:
79 ud.basename = os.path.basename(ud.path) 79 ud.basename = os.path.basename(ud.path)
80 80
81 ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 81 ud.localfile = ud.basename
82 if not ud.localfile: 82 if not ud.localfile:
83 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 83 ud.localfile = ud.host + ud.path.replace("/", ".")
84 84
85 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" 85 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
86
87 if ud.type == 'ftp' or ud.type == 'ftps':
88 self.basecmd += " --passive-ftp"
89
90 if not self.check_certs(d):
91 self.basecmd += " --no-check-certificate"
86 92
87 def _runwget(self, ud, d, command, quiet, workdir=None): 93 def _runwget(self, ud, d, command, quiet, workdir=None):
88 94
@@ -90,39 +96,53 @@ class Wget(FetchMethod):
90 96
91 logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) 97 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
92 bb.fetch2.check_network_access(d, command, ud.url) 98 bb.fetch2.check_network_access(d, command, ud.url)
93 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 99 runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
94 100
95 def download(self, ud, d): 101 def download(self, ud, d):
96 """Fetch urls""" 102 """Fetch urls"""
97 103
98 fetchcmd = self.basecmd 104 fetchcmd = self.basecmd
99 105
100 if 'downloadfilename' in ud.parm: 106 dldir = os.path.realpath(d.getVar("DL_DIR"))
101 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) 107 localpath = os.path.join(dldir, ud.localfile) + ".tmp"
102 bb.utils.mkdirhier(os.path.dirname(localpath)) 108 bb.utils.mkdirhier(os.path.dirname(localpath))
103 fetchcmd += " -O %s" % shlex.quote(localpath) 109 fetchcmd += " --output-document=%s" % shlex.quote(localpath)
104 110
105 if ud.user and ud.pswd: 111 if ud.user and ud.pswd:
106 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) 112 fetchcmd += " --auth-no-challenge"
113 if ud.parm.get("redirectauth", "1") == "1":
114 # An undocumented feature of wget is that if the
115 # username/password are specified on the URI, wget will only
116 # send the Authorization header to the first host and not to
117 # any hosts that it is redirected to. With the increasing
118 # usage of temporary AWS URLs, this difference now matters as
119 # AWS will reject any request that has authentication both in
120 # the query parameters (from the redirect) and in the
121 # Authorization header.
122 fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
107 123
108 uri = ud.url.split(";")[0] 124 uri = ud.url.split(";")[0]
109 if os.path.exists(ud.localpath): 125 fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
110 # file exists, but we didnt complete it.. trying again..
111 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
112 else:
113 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
114
115 self._runwget(ud, d, fetchcmd, False) 126 self._runwget(ud, d, fetchcmd, False)
116 127
117 # Sanity check since wget can pretend it succeed when it didn't 128 # Sanity check since wget can pretend it succeed when it didn't
118 # Also, this used to happen if sourceforge sent us to the mirror page 129 # Also, this used to happen if sourceforge sent us to the mirror page
119 if not os.path.exists(ud.localpath): 130 if not os.path.exists(localpath):
120 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) 131 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
121 132
122 if os.path.getsize(ud.localpath) == 0: 133 if os.path.getsize(localpath) == 0:
123 os.remove(ud.localpath) 134 os.remove(localpath)
124 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) 135 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
125 136
137 # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
138 # original file, which might be a race (imagine two recipes referencing the same
139 # source, one with an incorrect checksum)
140 bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
141
142 # Remove the ".tmp" and move the file into position atomically
143 # Our lock prevents multiple writers but mirroring code may grab incomplete files
144 os.rename(localpath, localpath[:-4])
145
126 return True 146 return True
127 147
128 def checkstatus(self, fetch, ud, d, try_again=True): 148 def checkstatus(self, fetch, ud, d, try_again=True):
@@ -209,12 +229,17 @@ class Wget(FetchMethod):
209 # We let the request fail and expect it to be 229 # We let the request fail and expect it to be
210 # tried once more ("try_again" in check_status()), 230 # tried once more ("try_again" in check_status()),
211 # with the dead connection removed from the cache. 231 # with the dead connection removed from the cache.
212 # If it still fails, we give up, which can happend for bad 232 # If it still fails, we give up, which can happen for bad
213 # HTTP proxy settings. 233 # HTTP proxy settings.
214 fetch.connection_cache.remove_connection(h.host, h.port) 234 fetch.connection_cache.remove_connection(h.host, h.port)
215 raise urllib.error.URLError(err) 235 raise urllib.error.URLError(err)
216 else: 236 else:
217 r = h.getresponse() 237 try:
238 r = h.getresponse()
239 except TimeoutError as e:
240 if fetch.connection_cache:
241 fetch.connection_cache.remove_connection(h.host, h.port)
242 raise TimeoutError(e)
218 243
219 # Pick apart the HTTPResponse object to get the addinfourl 244 # Pick apart the HTTPResponse object to get the addinfourl
220 # object initialized properly. 245 # object initialized properly.
@@ -275,71 +300,115 @@ class Wget(FetchMethod):
275 300
276 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 301 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
277 """ 302 """
278 urllib2.HTTPRedirectHandler resets the method to GET on redirect, 303 urllib2.HTTPRedirectHandler before 3.13 has two flaws:
279 when we want to follow redirects using the original method. 304
305 It resets the method to GET on redirect when we want to follow
306 redirects using the original method (typically HEAD). This was fixed
307 in 759e8e7.
308
309 It also doesn't handle 308 (Permanent Redirect). This was fixed in
310 c379bc5.
311
312 Until we depend on Python 3.13 onwards, copy the redirect_request
313 method to fix these issues.
280 """ 314 """
281 def redirect_request(self, req, fp, code, msg, headers, newurl): 315 def redirect_request(self, req, fp, code, msg, headers, newurl):
282 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 316 m = req.get_method()
283 newreq.get_method = req.get_method 317 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
284 return newreq 318 or code in (301, 302, 303) and m == "POST")):
285 exported_proxies = export_proxies(d) 319 raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
286 320
287 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback] 321 # Strictly (according to RFC 2616), 301 or 302 in response to
288 if exported_proxies: 322 # a POST MUST NOT cause a redirection without confirmation
289 handlers.append(urllib.request.ProxyHandler()) 323 # from the user (of urllib.request, in this case). In practice,
290 handlers.append(CacheHTTPHandler()) 324 # essentially all clients do redirect in this case, so we do
291 # Since Python 2.7.9 ssl cert validation is enabled by default 325 # the same.
292 # see PEP-0476, this causes verification errors on some https servers 326
293 # so disable by default. 327 # Be conciliant with URIs containing a space. This is mainly
294 import ssl 328 # redundant with the more complete encoding done in http_error_302(),
295 if hasattr(ssl, '_create_unverified_context'): 329 # but it is kept for compatibility with other callers.
296 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context())) 330 newurl = newurl.replace(' ', '%20')
297 opener = urllib.request.build_opener(*handlers) 331
298 332 CONTENT_HEADERS = ("content-length", "content-type")
299 try: 333 newheaders = {k: v for k, v in req.headers.items()
300 uri = ud.url.split(";")[0] 334 if k.lower() not in CONTENT_HEADERS}
301 r = urllib.request.Request(uri) 335 return urllib.request.Request(newurl,
302 r.get_method = lambda: "HEAD" 336 method="HEAD" if m == "HEAD" else "GET",
303 # Some servers (FusionForge, as used on Alioth) require that the 337 headers=newheaders,
304 # optional Accept header is set. 338 origin_req_host=req.origin_req_host,
305 r.add_header("Accept", "*/*") 339 unverifiable=True)
306 r.add_header("User-Agent", self.user_agent) 340
307 def add_basic_auth(login_str, request): 341 http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
308 '''Adds Basic auth to http request, pass in login:password as string''' 342
309 import base64 343 # We need to update the environment here as both the proxy and HTTPS
310 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") 344 # handlers need variables set. The proxy needs http_proxy and friends to
311 authheader = "Basic %s" % encodeuser 345 # be set, and HTTPSHandler ends up calling into openssl to load the
312 r.add_header("Authorization", authheader) 346 # certificates. In buildtools configurations this will be looking at the
313 347 # wrong place for certificates by default: we set SSL_CERT_FILE to the
314 if ud.user and ud.pswd: 348 # right location in the buildtools environment script but as BitBake
315 add_basic_auth(ud.user + ':' + ud.pswd, r) 349 # prunes prunes the environment this is lost. When binaries are executed
350 # runfetchcmd ensures these values are in the environment, but this is
351 # pure Python so we need to update the environment.
352 #
353 # Avoid tramping the environment too much by using bb.utils.environment
354 # to scope the changes to the build_opener request, which is when the
355 # environment lookups happen.
356 newenv = bb.fetch2.get_fetcher_environment(d)
357
358 with bb.utils.environment(**newenv):
359 import ssl
360
361 if self.check_certs(d):
362 context = ssl.create_default_context()
363 else:
364 context = ssl._create_unverified_context()
365
366 handlers = [FixedHTTPRedirectHandler,
367 HTTPMethodFallback,
368 urllib.request.ProxyHandler(),
369 CacheHTTPHandler(),
370 urllib.request.HTTPSHandler(context=context)]
371 opener = urllib.request.build_opener(*handlers)
316 372
317 try: 373 try:
318 import netrc 374 parts = urllib.parse.urlparse(ud.url.split(";")[0])
319 n = netrc.netrc() 375 uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
320 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) 376 r = urllib.request.Request(uri)
321 add_basic_auth("%s:%s" % (login, password), r) 377 r.get_method = lambda: "HEAD"
322 except (TypeError, ImportError, IOError, netrc.NetrcParseError): 378 # Some servers (FusionForge, as used on Alioth) require that the
323 pass 379 # optional Accept header is set.
324 380 r.add_header("Accept", "*/*")
325 with opener.open(r) as response: 381 r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
326 pass 382 def add_basic_auth(login_str, request):
327 except urllib.error.URLError as e: 383 '''Adds Basic auth to http request, pass in login:password as string'''
328 if try_again: 384 import base64
329 logger.debug2("checkstatus: trying again") 385 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
330 return self.checkstatus(fetch, ud, d, False) 386 authheader = "Basic %s" % encodeuser
331 else: 387 r.add_header("Authorization", authheader)
332 # debug for now to avoid spamming the logs in e.g. remote sstate searches 388
333 logger.debug2("checkstatus() urlopen failed: %s" % e) 389 if ud.user and ud.pswd:
334 return False 390 add_basic_auth(ud.user + ':' + ud.pswd, r)
335 except ConnectionResetError as e: 391
336 if try_again: 392 try:
337 logger.debug2("checkstatus: trying again") 393 import netrc
338 return self.checkstatus(fetch, ud, d, False) 394 auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
339 else: 395 if auth_data:
340 # debug for now to avoid spamming the logs in e.g. remote sstate searches 396 login, _, password = auth_data
341 logger.debug2("checkstatus() urlopen failed: %s" % e) 397 add_basic_auth("%s:%s" % (login, password), r)
342 return False 398 except (FileNotFoundError, netrc.NetrcParseError):
399 pass
400
401 with opener.open(r, timeout=100) as response:
402 pass
403 except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
404 if try_again:
405 logger.debug2("checkstatus: trying again")
406 return self.checkstatus(fetch, ud, d, False)
407 else:
408 # debug for now to avoid spamming the logs in e.g. remote sstate searches
409 logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
410 return False
411
343 return True 412 return True
344 413
345 def _parse_path(self, regex, s): 414 def _parse_path(self, regex, s):
@@ -416,7 +485,7 @@ class Wget(FetchMethod):
416 f = tempfile.NamedTemporaryFile() 485 f = tempfile.NamedTemporaryFile()
417 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 486 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
418 fetchcmd = self.basecmd 487 fetchcmd = self.basecmd
419 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" 488 fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
420 try: 489 try:
421 self._runwget(ud, d, fetchcmd, True, workdir=workdir) 490 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
422 fetchresult = f.read() 491 fetchresult = f.read()
@@ -472,7 +541,7 @@ class Wget(FetchMethod):
472 version_dir = ['', '', ''] 541 version_dir = ['', '', '']
473 version = ['', '', ''] 542 version = ['', '', '']
474 543
475 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") 544 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
476 s = dirver_regex.search(dirver) 545 s = dirver_regex.search(dirver)
477 if s: 546 if s:
478 version_dir[1] = s.group('ver') 547 version_dir[1] = s.group('ver')
@@ -548,7 +617,7 @@ class Wget(FetchMethod):
548 617
549 # src.rpm extension was added only for rpm package. Can be removed if the rpm 618 # src.rpm extension was added only for rpm package. Can be removed if the rpm
550 # packaged will always be considered as having to be manually upgraded 619 # packaged will always be considered as having to be manually upgraded
551 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" 620 psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
552 621
553 # match name, version and archive type of a package 622 # match name, version and archive type of a package
554 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 623 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
@@ -576,13 +645,17 @@ class Wget(FetchMethod):
576 645
577 sanity check to ensure same name and type. 646 sanity check to ensure same name and type.
578 """ 647 """
579 package = ud.path.split("/")[-1] 648 if 'downloadfilename' in ud.parm:
649 package = ud.parm['downloadfilename']
650 else:
651 package = ud.path.split("/")[-1]
580 current_version = ['', d.getVar('PV'), ''] 652 current_version = ['', d.getVar('PV'), '']
581 653
582 """possible to have no version in pkg name, such as spectrum-fw""" 654 """possible to have no version in pkg name, such as spectrum-fw"""
583 if not re.search(r"\d+", package): 655 if not re.search(r"\d+", package):
584 current_version[1] = re.sub('_', '.', current_version[1]) 656 current_version[1] = re.sub('_', '.', current_version[1])
585 current_version[1] = re.sub('-', '.', current_version[1]) 657 current_version[1] = re.sub('-', '.', current_version[1])
658 bb.debug(3, "latest_versionstring: no version found in %s" % package)
586 return (current_version[1], '') 659 return (current_version[1], '')
587 660
588 package_regex = self._init_regexes(package, ud, d) 661 package_regex = self._init_regexes(package, ud, d)
@@ -599,10 +672,10 @@ class Wget(FetchMethod):
599 # search for version matches on folders inside the path, like: 672 # search for version matches on folders inside the path, like:
600 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 673 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
601 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 674 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
602 m = dirver_regex.search(path) 675 m = dirver_regex.findall(path)
603 if m: 676 if m:
604 pn = d.getVar('PN') 677 pn = d.getVar('PN')
605 dirver = m.group('dirver') 678 dirver = m[-1][0]
606 679
607 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) 680 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
608 if not dirver_pn_regex.search(dirver): 681 if not dirver_pn_regex.search(dirver):