summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/fetch2/wget.py
diff options
context:
space:
mode:
authorAníbal Limón <anibal.limon@linux.intel.com>2014-11-05 12:10:29 -0600
committerRichard Purdie <richard.purdie@linuxfoundation.org>2014-11-12 15:25:17 +0000
commit6bb241a278135e760d50380fd3f7b0ff52414328 (patch)
tree44a501e079b5f64d7b7e0244428218d67fff8e65 /bitbake/lib/bb/fetch2/wget.py
parent7587877e5d683a30c2bd5e1ac1c4e327fac1ee1c (diff)
downloadpoky-6bb241a278135e760d50380fd3f7b0ff52414328.tar.gz
bitbake: fetch/wget: Add latest_versionstring method
Being able to query whether updated versions of a url are available is useful, not least for the package reporting system. Since such code is closely linked to the url type and the url itself, the fetcher makes a locical place to contain this code. For wget based urls this means taking upstream directory listings and searching those for later versions, returning those that are found. The patch also adds unittests for this function so that if improvements are made, the original test urls can be used to evaulate the those changes. This is based on code from Irina Patru <irina.patru@intel.com>. (Bitbake rev: a8272e22b7819e0e8afd8e291d276f5f28fc0007) Signed-off-by: Aníbal Limón <anibal.limon@linux.intel.com> Signed-off-by: Saul Wold <sgw@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bb/fetch2/wget.py')
-rw-r--r--bitbake/lib/bb/fetch2/wget.py239
1 files changed, 239 insertions, 0 deletions
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index 0456490368..db5f27b6f1 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -25,6 +25,9 @@ BitBake build tools.
25# 25#
26# Based on functions from the base bb module, Copyright 2003 Holger Schurig 26# Based on functions from the base bb module, Copyright 2003 Holger Schurig
27 27
28import re
29import tempfile
30import subprocess
28import os 31import os
29import logging 32import logging
30import bb 33import bb
@@ -34,6 +37,7 @@ from bb.fetch2 import FetchMethod
34from bb.fetch2 import FetchError 37from bb.fetch2 import FetchError
35from bb.fetch2 import logger 38from bb.fetch2 import logger
36from bb.fetch2 import runfetchcmd 39from bb.fetch2 import runfetchcmd
40from bs4 import BeautifulSoup
37 41
38class Wget(FetchMethod): 42class Wget(FetchMethod):
39 """Class to fetch urls via 'wget'""" 43 """Class to fetch urls via 'wget'"""
@@ -104,3 +108,238 @@ class Wget(FetchMethod):
104 self._runwget(ud, d, fetchcmd, True) 108 self._runwget(ud, d, fetchcmd, True)
105 109
106 return True 110 return True
111
112
113 def _parse_path(self, regex, s):
114 """
115 Find and group name, version and archive type in the given string s
116 """
117 bb.debug(3, "parse_path(%s, %s)" % (regex.pattern, s))
118 m = regex.search(s)
119 if m:
120 bb.debug(3, "%s, %s, %s" % (m.group('name'), m.group('ver'), m.group('type')))
121 return (m.group('name'), m.group('ver'), m.group('type'))
122 return None
123
124 def _modelate_version(self, version):
125 if version[0] in ['.', '-']:
126 if version[1].isdigit():
127 version = version[1] + version[0] + version[2:len(version)]
128 else:
129 version = version[1:len(version)]
130
131 version = re.sub('\-', '.', version)
132 version = re.sub('_', '.', version)
133 version = re.sub('(rc)+', '.-1.', version)
134 version = re.sub('(alpha)+', '.-3.', version)
135 version = re.sub('(beta)+', '.-2.', version)
136 if version[0] == 'v':
137 version = version[1:len(version)]
138 return version
139
140 def _vercmp(self, old, new):
141 """
142 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
143 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
144 for simplicity as it's somehow difficult to get from various upstream format
145 """
146
147 (oldpn, oldpv, oldsuffix) = old
148 (newpn, newpv, newsuffix) = new
149
150 """
151 Check for a new suffix type that we have never heard of before
152 """
153 if (newsuffix):
154 m = self.suffixregex.search(newsuffix)
155 if not m:
156 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
157 return False
158
159 """
160 Not our package so ignore it
161 """
162 if oldpn != newpn:
163 return False
164
165 oldpv = self._modelate_version(oldpv)
166 newpv = self._modelate_version(newpv)
167
168 if bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, "")) < 0:
169 return True
170 else:
171 return False
172
173 def _fetch_index(self, uri, ud, d):
174 """
175 Run fetch checkstatus to get directory information
176 """
177 f = tempfile.NamedTemporaryFile(dir="/tmp/s/", delete=False)
178
179 agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
180 fetchcmd = self.basecmd
181 fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
182 try:
183 self._runwget(ud, d, fetchcmd, True)
184 fetchresult = f.read()
185 except bb.fetch2.BBFetchException:
186 fetchresult = ""
187
188 f.close()
189 # os.unlink(f.name)
190 return fetchresult
191
192 def _check_latest_dir(self, url, versionstring, ud, d):
193 """
194 Return the name of the directory with the greatest package version
195 If error or no version, return ""
196 """
197 bb.debug(3, "DirURL: %s, %s" % (url, versionstring))
198 soup = BeautifulSoup(self._fetch_index(url, ud, d))
199 if not soup:
200 return ""
201
202 valid = 0
203 prefix = ''
204 regex = re.compile("(\D*)((\d+[\.\-_])+(\d+))")
205 m = regex.search(versionstring)
206 if m:
207 version = ('', m.group(2), '')
208 prefix = m.group(1)
209 bb.debug(3, "version: %s, prefix: %s" % (version, prefix))
210 else:
211 version = ('', versionstring, '')
212
213 for href in soup.find_all('a', href=True):
214 bb.debug(3, "href: %s" % (href['href']))
215 if href['href'].find(versionstring) >= 0:
216 valid = 1
217 m = regex.search(href['href'].strip("/"))
218 if m:
219 thisversion = ('', m.group(2), '')
220 if thisversion and self._vercmp(version, thisversion) == True:
221 version = thisversion
222
223 if valid:
224 bb.debug(3, "Would return %s" % (prefix+version[1]))
225 return prefix+version[1]
226 else:
227 bb.debug(3, "Not Valid")
228 return ""
229
230 def _check_latest_version(self, url, packagename, ud, d):
231 """
232 Return the latest version of a package inside a given directory path
233 If error or no version, return ""
234 """
235 valid = 0
236 version = self._parse_path(self.name_version_type_regex, packagename)
237
238 bb.debug(3, "VersionURL: %s" % (url))
239 soup = BeautifulSoup(self._fetch_index(url, ud, d))
240 if not soup:
241 bb.debug(3, "*** %s NO SOUP" % (packagename))
242 return ""
243
244 pn_regex = d.getVar('REGEX', True)
245 if pn_regex:
246 testversion = version
247 pn_regex = re.compile(pn_regex)
248 bb.debug(3, "pn_regex = '%s'" % (pn_regex.pattern))
249
250 for line in soup.find_all('a', href=True):
251 newver = ('', '', '')
252 bb.debug(3, "line = '%s'" % (line['href']))
253 if pn_regex:
254 m = pn_regex.search(line['href'])
255 if m:
256 bb.debug(3, "Name = '%s', Pver = '%s'" % (m.group('name'), m.group('pver')))
257 newver = (m.group('name'), m.group('pver'), '')
258 else:
259 continue
260 else:
261 newver = self._parse_path(self.name_version_type_regex, line['href'])
262 valid = 1
263 if newver and self._vercmp(version, newver) == True:
264 version = newver
265
266 # check whether a valid package and version were found
267
268 if not valid:
269 version = ('', '', '')
270 if not pn_regex:
271 testversion = ('', '', '')
272 bb.debug(3, "*** %s -> %s (TestVersion = %s)" % (packagename, version[1], testversion[1]))
273 if valid and version:
274 return re.sub('_', '.', version[1])
275
276 def latest_versionstring(self, ud, d):
277 """
278 Manipulate the URL and try to obtain the latest package version
279
280 sanity check to ensure same name and type. Match as many patterns as possible
281 such as:
282 gnome-common-2.20.0.tar.gz (most common format)
283 gtk+-2.90.1.tar.gz
284 xf86-input-synaptics-12.6.9.tar.gz
285 dri2proto-2.3.tar.gz
286 blktool_4.orig.tar.gz
287 libid3tag-0.15.1b.tar.gz
288 unzip552.tar.gz
289 icu4c-3_6-src.tgz
290 genext2fs_1.3.orig.tar.gz
291 gst-fluendo-mp3
292 """
293 # match most patterns which uses "-" as separator to version digits
294 pn_prefix1 = "[a-zA-Z][a-zA-Z0-9]*([\-_][a-zA-Z]\w+)*\+?[\-_]"
295 # a loose pattern such as for unzip552.tar.gz
296 pn_prefix2 = "[a-zA-Z]+"
297 # a loose pattern such as for 80325-quicky-0.4.tar.gz
298 pn_prefix3 = "[0-9]+[\-]?[a-zA-Z]+"
299 # Save the Package Name (pn) Regex for use later
300 self.pn_regex = "(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
301
302 # match version
303 version_regex = "(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
304
305 # src.rpm extension was added only for rpm package. Can be removed if the rpm
306 # packaged will always be considered as having to be manually upgraded
307 suffixlist = "(tar\.gz|tgz|tar\.bz2|zip|xz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
308 self.suffixregex = re.compile(suffixlist)
309
310 # match name, version and archive type of a package
311 self.name_version_type_regex = re.compile("(?P<name>%s?)\.?v?(?P<ver>%s)(\-source)?[\.\-](?P<type>%s$)" % (self.pn_regex, version_regex, suffixlist))
312
313 regex_uri = d.getVar("REGEX_URI", True)
314 newpath = ud.path
315 pupver = ""
316
317 # search for version matches on folders inside the path, like:
318 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
319 m = re.search("(?P<dirver>[^/]*(\d+\.)*\d+([\-_]r\d+)*)/", ud.path)
320 bb.debug(3, "path = %s" % (ud.path))
321 bb.debug(3, "Regex: %s" % (self.name_version_type_regex.pattern))
322 if m and not regex_uri:
323 dirver = m.group('dirver')
324 # generate the new uri after removing version directory name
325 newuri = bb.fetch.encodeurl([ud.type, ud.host, ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
326 newversion = self._check_latest_dir(newuri, dirver, ud, d)
327 if newversion and dirver != newversion:
328 newpath = ud.path.replace(dirver, newversion, True)
329
330 # try to acquire all remote files in current directory
331 packagename = newpath.split("/")[-1] # current package name
332 newpath = newpath.split(packagename)[0] or "/" # path to directory
333
334 # generate the new uri with the appropriate latest directory
335 newuri = regex_uri or bb.fetch.encodeurl([ud.type, ud.host, newpath, ud.user, ud.pswd, {}])
336 newversion = self._check_latest_version(newuri, packagename, ud, d)
337 while not newversion:
338 # maybe it's hiding in a download directory so try there
339 newuri = "/".join(newuri.split("/")[0:-2]) + "/download"
340 if newuri == "/download" or newuri == "http://download":
341 break
342 newversion = self._check_latest_version(newuri, packagename, ud, d)
343
344 return newversion
345