From 9d19dd9bd709dca655b1eac13deaa49e2b54cd88 Mon Sep 17 00:00:00 2001 From: Alexander Kanavin Date: Fri, 4 Dec 2015 13:00:20 +0200 Subject: bitbake: wget.py: parse only tags For two reasons: 1) The important one: we hit the following bug when doing upstream version checks on some webpages: https://bugs.launchpad.net/beautifulsoup/+bug/1471755 2) Also, documentation for beautifulsoup states that memory usage and speed is improved that way. (Bitbake rev: 7546d4aeb3ba8fda9832081b84d93138dc5e58d6) Signed-off-by: Alexander Kanavin Signed-off-by: Richard Purdie --- bitbake/lib/bb/fetch2/wget.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'bitbake/lib/bb/fetch2/wget.py') diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py index bd2a8972a7..c185f5b5f4 100644 --- a/bitbake/lib/bb/fetch2/wget.py +++ b/bitbake/lib/bb/fetch2/wget.py @@ -38,6 +38,7 @@ from bb.fetch2 import FetchError from bb.fetch2 import logger from bb.fetch2 import runfetchcmd from bs4 import BeautifulSoup +from bs4 import SoupStrainer class Wget(FetchMethod): """Class to fetch urls via 'wget'""" @@ -367,7 +368,7 @@ class Wget(FetchMethod): version = ['', '', ''] bb.debug(3, "VersionURL: %s" % (url)) - soup = BeautifulSoup(self._fetch_index(url, ud, d)) + soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) if not soup: bb.debug(3, "*** %s NO SOUP" % (url)) return "" @@ -417,7 +418,7 @@ class Wget(FetchMethod): ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) - soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d)) + soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) if not soup: return version[1] -- cgit v1.2.3-54-g00ecf