summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/fetch2/wget.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r--bitbake/lib/bb/fetch2/wget.py617
1 files changed, 0 insertions, 617 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
deleted file mode 100644
index 6d82f3af07..0000000000
--- a/bitbake/lib/bb/fetch2/wget.py
+++ /dev/null
@@ -1,617 +0,0 @@
1"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004 Chris Larson
10#
11# SPDX-License-Identifier: GPL-2.0-only
12#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
15import shlex
16import re
17import tempfile
18import os
19import errno
20import bb
21import bb.progress
22import socket
23import http.client
24import urllib.request, urllib.parse, urllib.error
25from bb.fetch2 import FetchMethod
26from bb.fetch2 import FetchError
27from bb.fetch2 import logger
28from bb.fetch2 import runfetchcmd
29from bb.utils import export_proxies
30from bs4 import BeautifulSoup
31from bs4 import SoupStrainer
32
33class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34 """
35 Extract progress information from wget output.
36 Note: relies on --progress=dot (with -v or without -q/-nv) being
37 specified on the wget command line.
38 """
39 def __init__(self, d):
40 super(WgetProgressHandler, self).__init__(d)
41 # Send an initial progress event so the bar gets shown
42 self._fire_progress(0)
43
44 def writeline(self, line):
45 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46 if percs:
47 progress = int(percs[-1][0])
48 rate = percs[-1][1] + '/s'
49 self.update(progress, rate)
50 return False
51 return True
52
53
54class Wget(FetchMethod):
55
56 # CDNs like CloudFlare may do a 'browser integrity test' which can fail
57 # with the standard wget/urllib User-Agent, so pretend to be a modern
58 # browser.
59 user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
60
61 """Class to fetch urls via 'wget'"""
62 def supports(self, ud, d):
63 """
64 Check to see if a given url can be fetched with wget.
65 """
66 return ud.type in ['http', 'https', 'ftp']
67
68 def recommends_checksum(self, urldata):
69 return True
70
71 def urldata_init(self, ud, d):
72 if 'protocol' in ud.parm:
73 if ud.parm['protocol'] == 'git':
74 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
75
76 if 'downloadfilename' in ud.parm:
77 ud.basename = ud.parm['downloadfilename']
78 else:
79 ud.basename = os.path.basename(ud.path)
80
81 ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
82 if not ud.localfile:
83 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
84
85 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
86
87 def _runwget(self, ud, d, command, quiet, workdir=None):
88
89 progresshandler = WgetProgressHandler(d)
90
91 logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
92 bb.fetch2.check_network_access(d, command, ud.url)
93 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
94
95 def download(self, ud, d):
96 """Fetch urls"""
97
98 fetchcmd = self.basecmd
99
100 if 'downloadfilename' in ud.parm:
101 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
102 bb.utils.mkdirhier(os.path.dirname(localpath))
103 fetchcmd += " -O %s" % shlex.quote(localpath)
104
105 if ud.user and ud.pswd:
106 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
107
108 uri = ud.url.split(";")[0]
109 if os.path.exists(ud.localpath):
110 # file exists, but we didnt complete it.. trying again..
111 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
112 else:
113 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
114
115 self._runwget(ud, d, fetchcmd, False)
116
117 # Sanity check since wget can pretend it succeed when it didn't
118 # Also, this used to happen if sourceforge sent us to the mirror page
119 if not os.path.exists(ud.localpath):
120 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
121
122 if os.path.getsize(ud.localpath) == 0:
123 os.remove(ud.localpath)
124 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
125
126 return True
127
128 def checkstatus(self, fetch, ud, d, try_again=True):
129 class HTTPConnectionCache(http.client.HTTPConnection):
130 if fetch.connection_cache:
131 def connect(self):
132 """Connect to the host and port specified in __init__."""
133
134 sock = fetch.connection_cache.get_connection(self.host, self.port)
135 if sock:
136 self.sock = sock
137 else:
138 self.sock = socket.create_connection((self.host, self.port),
139 self.timeout, self.source_address)
140 fetch.connection_cache.add_connection(self.host, self.port, self.sock)
141
142 if self._tunnel_host:
143 self._tunnel()
144
145 class CacheHTTPHandler(urllib.request.HTTPHandler):
146 def http_open(self, req):
147 return self.do_open(HTTPConnectionCache, req)
148
149 def do_open(self, http_class, req):
150 """Return an addinfourl object for the request, using http_class.
151
152 http_class must implement the HTTPConnection API from httplib.
153 The addinfourl return value is a file-like object. It also
154 has methods and attributes including:
155 - info(): return a mimetools.Message object for the headers
156 - geturl(): return the original request URL
157 - code: HTTP status code
158 """
159 host = req.host
160 if not host:
161 raise urllib.error.URLError('no host given')
162
163 h = http_class(host, timeout=req.timeout) # will parse host:port
164 h.set_debuglevel(self._debuglevel)
165
166 headers = dict(req.unredirected_hdrs)
167 headers.update(dict((k, v) for k, v in list(req.headers.items())
168 if k not in headers))
169
170 # We want to make an HTTP/1.1 request, but the addinfourl
171 # class isn't prepared to deal with a persistent connection.
172 # It will try to read all remaining data from the socket,
173 # which will block while the server waits for the next request.
174 # So make sure the connection gets closed after the (only)
175 # request.
176
177 # Don't close connection when connection_cache is enabled,
178 if fetch.connection_cache is None:
179 headers["Connection"] = "close"
180 else:
181 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
182
183 headers = dict(
184 (name.title(), val) for name, val in list(headers.items()))
185
186 if req._tunnel_host:
187 tunnel_headers = {}
188 proxy_auth_hdr = "Proxy-Authorization"
189 if proxy_auth_hdr in headers:
190 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
191 # Proxy-Authorization should not be sent to origin
192 # server.
193 del headers[proxy_auth_hdr]
194 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
195
196 try:
197 h.request(req.get_method(), req.selector, req.data, headers)
198 except socket.error as err: # XXX what error?
199 # Don't close connection when cache is enabled.
200 # Instead, try to detect connections that are no longer
201 # usable (for example, closed unexpectedly) and remove
202 # them from the cache.
203 if fetch.connection_cache is None:
204 h.close()
205 elif isinstance(err, OSError) and err.errno == errno.EBADF:
206 # This happens when the server closes the connection despite the Keep-Alive.
207 # Apparently urllib then uses the file descriptor, expecting it to be
208 # connected, when in reality the connection is already gone.
209 # We let the request fail and expect it to be
210 # tried once more ("try_again" in check_status()),
211 # with the dead connection removed from the cache.
212 # If it still fails, we give up, which can happend for bad
213 # HTTP proxy settings.
214 fetch.connection_cache.remove_connection(h.host, h.port)
215 raise urllib.error.URLError(err)
216 else:
217 r = h.getresponse()
218
219 # Pick apart the HTTPResponse object to get the addinfourl
220 # object initialized properly.
221
222 # Wrap the HTTPResponse object in socket's file object adapter
223 # for Windows. That adapter calls recv(), so delegate recv()
224 # to read(). This weird wrapping allows the returned object to
225 # have readline() and readlines() methods.
226
227 # XXX It might be better to extract the read buffering code
228 # out of socket._fileobject() and into a base class.
229 r.recv = r.read
230
231 # no data, just have to read
232 r.read()
233 class fp_dummy(object):
234 def read(self):
235 return ""
236 def readline(self):
237 return ""
238 def close(self):
239 pass
240 closed = False
241
242 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
243 resp.code = r.status
244 resp.msg = r.reason
245
246 # Close connection when server request it.
247 if fetch.connection_cache is not None:
248 if 'Connection' in r.msg and r.msg['Connection'] == 'close':
249 fetch.connection_cache.remove_connection(h.host, h.port)
250
251 return resp
252
253 class HTTPMethodFallback(urllib.request.BaseHandler):
254 """
255 Fallback to GET if HEAD is not allowed (405 HTTP error)
256 """
257 def http_error_405(self, req, fp, code, msg, headers):
258 fp.read()
259 fp.close()
260
261 if req.get_method() != 'GET':
262 newheaders = dict((k, v) for k, v in list(req.headers.items())
263 if k.lower() not in ("content-length", "content-type"))
264 return self.parent.open(urllib.request.Request(req.get_full_url(),
265 headers=newheaders,
266 origin_req_host=req.origin_req_host,
267 unverifiable=True))
268
269 raise urllib.request.HTTPError(req, code, msg, headers, None)
270
271 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
272 # Forbidden when they actually mean 405 Method Not Allowed.
273 http_error_403 = http_error_405
274
275
276 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
277 """
278 urllib2.HTTPRedirectHandler resets the method to GET on redirect,
279 when we want to follow redirects using the original method.
280 """
281 def redirect_request(self, req, fp, code, msg, headers, newurl):
282 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
283 newreq.get_method = req.get_method
284 return newreq
285 exported_proxies = export_proxies(d)
286
287 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
288 if exported_proxies:
289 handlers.append(urllib.request.ProxyHandler())
290 handlers.append(CacheHTTPHandler())
291 # Since Python 2.7.9 ssl cert validation is enabled by default
292 # see PEP-0476, this causes verification errors on some https servers
293 # so disable by default.
294 import ssl
295 if hasattr(ssl, '_create_unverified_context'):
296 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
297 opener = urllib.request.build_opener(*handlers)
298
299 try:
300 uri = ud.url.split(";")[0]
301 r = urllib.request.Request(uri)
302 r.get_method = lambda: "HEAD"
303 # Some servers (FusionForge, as used on Alioth) require that the
304 # optional Accept header is set.
305 r.add_header("Accept", "*/*")
306 r.add_header("User-Agent", self.user_agent)
307 def add_basic_auth(login_str, request):
308 '''Adds Basic auth to http request, pass in login:password as string'''
309 import base64
310 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
311 authheader = "Basic %s" % encodeuser
312 r.add_header("Authorization", authheader)
313
314 if ud.user and ud.pswd:
315 add_basic_auth(ud.user + ':' + ud.pswd, r)
316
317 try:
318 import netrc
319 n = netrc.netrc()
320 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
321 add_basic_auth("%s:%s" % (login, password), r)
322 except (TypeError, ImportError, IOError, netrc.NetrcParseError):
323 pass
324
325 with opener.open(r) as response:
326 pass
327 except urllib.error.URLError as e:
328 if try_again:
329 logger.debug2("checkstatus: trying again")
330 return self.checkstatus(fetch, ud, d, False)
331 else:
332 # debug for now to avoid spamming the logs in e.g. remote sstate searches
333 logger.debug2("checkstatus() urlopen failed: %s" % e)
334 return False
335 except ConnectionResetError as e:
336 if try_again:
337 logger.debug2("checkstatus: trying again")
338 return self.checkstatus(fetch, ud, d, False)
339 else:
340 # debug for now to avoid spamming the logs in e.g. remote sstate searches
341 logger.debug2("checkstatus() urlopen failed: %s" % e)
342 return False
343 return True
344
345 def _parse_path(self, regex, s):
346 """
347 Find and group name, version and archive type in the given string s
348 """
349
350 m = regex.search(s)
351 if m:
352 pname = ''
353 pver = ''
354 ptype = ''
355
356 mdict = m.groupdict()
357 if 'name' in mdict.keys():
358 pname = mdict['name']
359 if 'pver' in mdict.keys():
360 pver = mdict['pver']
361 if 'type' in mdict.keys():
362 ptype = mdict['type']
363
364 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
365
366 return (pname, pver, ptype)
367
368 return None
369
370 def _modelate_version(self, version):
371 if version[0] in ['.', '-']:
372 if version[1].isdigit():
373 version = version[1] + version[0] + version[2:len(version)]
374 else:
375 version = version[1:len(version)]
376
377 version = re.sub('-', '.', version)
378 version = re.sub('_', '.', version)
379 version = re.sub('(rc)+', '.1000.', version)
380 version = re.sub('(beta)+', '.100.', version)
381 version = re.sub('(alpha)+', '.10.', version)
382 if version[0] == 'v':
383 version = version[1:len(version)]
384 return version
385
386 def _vercmp(self, old, new):
387 """
388 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
389 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
390 for simplicity as it's somehow difficult to get from various upstream format
391 """
392
393 (oldpn, oldpv, oldsuffix) = old
394 (newpn, newpv, newsuffix) = new
395
396 # Check for a new suffix type that we have never heard of before
397 if newsuffix:
398 m = self.suffix_regex_comp.search(newsuffix)
399 if not m:
400 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
401 return False
402
403 # Not our package so ignore it
404 if oldpn != newpn:
405 return False
406
407 oldpv = self._modelate_version(oldpv)
408 newpv = self._modelate_version(newpv)
409
410 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
411
412 def _fetch_index(self, uri, ud, d):
413 """
414 Run fetch checkstatus to get directory information
415 """
416 f = tempfile.NamedTemporaryFile()
417 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
418 fetchcmd = self.basecmd
419 fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
420 try:
421 self._runwget(ud, d, fetchcmd, True, workdir=workdir)
422 fetchresult = f.read()
423 except bb.fetch2.BBFetchException:
424 fetchresult = ""
425
426 return fetchresult
427
428 def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
429 """
430 Return the latest version of a package inside a given directory path
431 If error or no version, return ""
432 """
433 valid = 0
434 version = ['', '', '']
435
436 bb.debug(3, "VersionURL: %s" % (url))
437 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
438 if not soup:
439 bb.debug(3, "*** %s NO SOUP" % (url))
440 return ""
441
442 for line in soup.find_all('a', href=True):
443 bb.debug(3, "line['href'] = '%s'" % (line['href']))
444 bb.debug(3, "line = '%s'" % (str(line)))
445
446 newver = self._parse_path(package_regex, line['href'])
447 if not newver:
448 newver = self._parse_path(package_regex, str(line))
449
450 if newver:
451 bb.debug(3, "Upstream version found: %s" % newver[1])
452 if valid == 0:
453 version = newver
454 valid = 1
455 elif self._vercmp(version, newver) < 0:
456 version = newver
457
458 pupver = re.sub('_', '.', version[1])
459
460 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
461 (package, pupver or "N/A", current_version[1]))
462
463 if valid:
464 return pupver
465
466 return ""
467
468 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
469 """
470 Scan every directory in order to get upstream version.
471 """
472 version_dir = ['', '', '']
473 version = ['', '', '']
474
475 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
476 s = dirver_regex.search(dirver)
477 if s:
478 version_dir[1] = s.group('ver')
479 else:
480 version_dir[1] = dirver
481
482 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
483 ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
484 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
485
486 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
487 if not soup:
488 return version[1]
489
490 for line in soup.find_all('a', href=True):
491 s = dirver_regex.search(line['href'].strip("/"))
492 if s:
493 sver = s.group('ver')
494
495 # When prefix is part of the version directory it need to
496 # ensure that only version directory is used so remove previous
497 # directories if exists.
498 #
499 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
500 # result is v2.5.
501 spfx = s.group('pfx').split('/')[-1]
502
503 version_dir_new = ['', sver, '']
504 if self._vercmp(version_dir, version_dir_new) <= 0:
505 dirver_new = spfx + sver
506 path = ud.path.replace(dirver, dirver_new, True) \
507 .split(package)[0]
508 uri = bb.fetch.encodeurl([ud.type, ud.host, path,
509 ud.user, ud.pswd, {}])
510
511 pupver = self._check_latest_version(uri,
512 package, package_regex, current_version, ud, d)
513 if pupver:
514 version[1] = pupver
515
516 version_dir = version_dir_new
517
518 return version[1]
519
520 def _init_regexes(self, package, ud, d):
521 """
522 Match as many patterns as possible such as:
523 gnome-common-2.20.0.tar.gz (most common format)
524 gtk+-2.90.1.tar.gz
525 xf86-input-synaptics-12.6.9.tar.gz
526 dri2proto-2.3.tar.gz
527 blktool_4.orig.tar.gz
528 libid3tag-0.15.1b.tar.gz
529 unzip552.tar.gz
530 icu4c-3_6-src.tgz
531 genext2fs_1.3.orig.tar.gz
532 gst-fluendo-mp3
533 """
534 # match most patterns which uses "-" as separator to version digits
535 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
536 # a loose pattern such as for unzip552.tar.gz
537 pn_prefix2 = r"[a-zA-Z]+"
538 # a loose pattern such as for 80325-quicky-0.4.tar.gz
539 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
540 # Save the Package Name (pn) Regex for use later
541 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
542
543 # match version
544 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
545
546 # match arch
547 parch_regex = "-source|_all_"
548
549 # src.rpm extension was added only for rpm package. Can be removed if the rpm
550 # packaged will always be considered as having to be manually upgraded
551 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
552
553 # match name, version and archive type of a package
554 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
555 % (pn_regex, pver_regex, parch_regex, psuffix_regex))
556 self.suffix_regex_comp = re.compile(psuffix_regex)
557
558 # compile regex, can be specific by package or generic regex
559 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
560 if pn_regex:
561 package_custom_regex_comp = re.compile(pn_regex)
562 else:
563 version = self._parse_path(package_regex_comp, package)
564 if version:
565 package_custom_regex_comp = re.compile(
566 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
567 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
568 else:
569 package_custom_regex_comp = None
570
571 return package_custom_regex_comp
572
573 def latest_versionstring(self, ud, d):
574 """
575 Manipulate the URL and try to obtain the latest package version
576
577 sanity check to ensure same name and type.
578 """
579 package = ud.path.split("/")[-1]
580 current_version = ['', d.getVar('PV'), '']
581
582 """possible to have no version in pkg name, such as spectrum-fw"""
583 if not re.search(r"\d+", package):
584 current_version[1] = re.sub('_', '.', current_version[1])
585 current_version[1] = re.sub('-', '.', current_version[1])
586 return (current_version[1], '')
587
588 package_regex = self._init_regexes(package, ud, d)
589 if package_regex is None:
590 bb.warn("latest_versionstring: package %s don't match pattern" % (package))
591 return ('', '')
592 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
593
594 uri = ""
595 regex_uri = d.getVar("UPSTREAM_CHECK_URI")
596 if not regex_uri:
597 path = ud.path.split(package)[0]
598
599 # search for version matches on folders inside the path, like:
600 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
601 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
602 m = dirver_regex.search(path)
603 if m:
604 pn = d.getVar('PN')
605 dirver = m.group('dirver')
606
607 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
608 if not dirver_pn_regex.search(dirver):
609 return (self._check_latest_version_by_dir(dirver,
610 package, package_regex, current_version, ud, d), '')
611
612 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
613 else:
614 uri = regex_uri
615
616 return (self._check_latest_version(uri, package, package_regex,
617 current_version, ud, d), '')