summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorTom Zanussi <tom.zanussi@linux.intel.com>2014-08-04 07:55:07 -0500
committerRichard Purdie <richard.purdie@linuxfoundation.org>2014-08-11 10:53:10 +0100
commitc9b5ea08734c5bb588de0505e6b762691ac9223a (patch)
treea09ed9bb8161ae40ecae733c32413ec73d6d23c8 /scripts
parentba19b60fd2bf2594e1fffeb75757775628228b8a (diff)
downloadpoky-c9b5ea08734c5bb588de0505e6b762691ac9223a.tar.gz
wic: Remove 3rdparty/urlgrabber
wic doesn't use it, so remove it. (From OE-Core rev: 00dcdb29c89634ab267d328eb00f8eb70c696655) Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py53
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py463
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py1477
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py617
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py458
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py530
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py90
7 files changed, 0 insertions, 3688 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py
deleted file mode 100644
index 7bcd9d5541..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py
+++ /dev/null
@@ -1,53 +0,0 @@
1# This program is free software; you can redistribute it and/or modify
2# it under the terms of the GNU General Public License as published by
3# the Free Software Foundation; either version 2 of the License, or
4# (at your option) any later version.
5#
6# This program is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9# GNU Library General Public License for more details.
10#
11# You should have received a copy of the GNU General Public License
12# along with this program; if not, write to the Free Software
13# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
14
15# Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko
16
17# $Id: __init__.py,v 1.20 2006/09/22 00:58:55 mstenner Exp $
18
19"""A high-level cross-protocol url-grabber.
20
21Using urlgrabber, data can be fetched in three basic ways:
22
23 urlgrab(url) copy the file to the local filesystem
24 urlopen(url) open the remote file and return a file object
25 (like urllib2.urlopen)
26 urlread(url) return the contents of the file as a string
27
28When using these functions (or methods), urlgrabber supports the
29following features:
30
31 * identical behavior for http://, ftp://, and file:// urls
32 * http keepalive - faster downloads of many files by using
33 only a single connection
34 * byte ranges - fetch only a portion of the file
35 * reget - for a urlgrab, resume a partial download
36 * progress meters - the ability to report download progress
37 automatically, even when using urlopen!
38 * throttling - restrict bandwidth usage
39 * retries - automatically retry a download if it fails. The
40 number of retries and failure types are configurable.
41 * authenticated server access for http and ftp
42 * proxy support - support for authenticated http and ftp proxies
43 * mirror groups - treat a list of mirrors as a single source,
44 automatically switching mirrors if there is a failure.
45"""
46
47__version__ = '3.1.0'
48__date__ = '2006/09/21'
49__author__ = 'Michael D. Stenner <mstenner@linux.duke.edu>, ' \
50 'Ryan Tomayko <rtomayko@naeblis.cx>'
51__url__ = 'http://linux.duke.edu/projects/urlgrabber/'
52
53from grabber import urlgrab, urlopen, urlread
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py
deleted file mode 100644
index 001b4e32d6..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py
+++ /dev/null
@@ -1,463 +0,0 @@
1# This library is free software; you can redistribute it and/or
2# modify it under the terms of the GNU Lesser General Public
3# License as published by the Free Software Foundation; either
4# version 2.1 of the License, or (at your option) any later version.
5#
6# This library is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9# Lesser General Public License for more details.
10#
11# You should have received a copy of the GNU Lesser General Public
12# License along with this library; if not, write to the
13# Free Software Foundation, Inc.,
14# 59 Temple Place, Suite 330,
15# Boston, MA 02111-1307 USA
16
17# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
20# $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $
21
22import os
23import stat
24import urllib
25import urllib2
26import rfc822
27
28DEBUG = None
29
30try:
31 from cStringIO import StringIO
32except ImportError, msg:
33 from StringIO import StringIO
34
35class RangeError(IOError):
36 """Error raised when an unsatisfiable range is requested."""
37 pass
38
39class HTTPRangeHandler(urllib2.BaseHandler):
40 """Handler that enables HTTP Range headers.
41
42 This was extremely simple. The Range header is a HTTP feature to
43 begin with so all this class does is tell urllib2 that the
44 "206 Partial Content" reponse from the HTTP server is what we
45 expected.
46
47 Example:
48 import urllib2
49 import byterange
50
51 range_handler = range.HTTPRangeHandler()
52 opener = urllib2.build_opener(range_handler)
53
54 # install it
55 urllib2.install_opener(opener)
56
57 # create Request and set Range header
58 req = urllib2.Request('http://www.python.org/')
59 req.header['Range'] = 'bytes=30-50'
60 f = urllib2.urlopen(req)
61 """
62
63 def http_error_206(self, req, fp, code, msg, hdrs):
64 # 206 Partial Content Response
65 r = urllib.addinfourl(fp, hdrs, req.get_full_url())
66 r.code = code
67 r.msg = msg
68 return r
69
70 def http_error_416(self, req, fp, code, msg, hdrs):
71 # HTTP's Range Not Satisfiable error
72 raise RangeError('Requested Range Not Satisfiable')
73
74class HTTPSRangeHandler(HTTPRangeHandler):
75 """ Range Header support for HTTPS. """
76
77 def https_error_206(self, req, fp, code, msg, hdrs):
78 return self.http_error_206(req, fp, code, msg, hdrs)
79
80 def https_error_416(self, req, fp, code, msg, hdrs):
81 self.https_error_416(req, fp, code, msg, hdrs)
82
83class RangeableFileObject:
84 """File object wrapper to enable raw range handling.
85 This was implemented primarilary for handling range
86 specifications for file:// urls. This object effectively makes
87 a file object look like it consists only of a range of bytes in
88 the stream.
89
90 Examples:
91 # expose 10 bytes, starting at byte position 20, from
92 # /etc/aliases.
93 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
94 # seek seeks within the range (to position 23 in this case)
95 >>> fo.seek(3)
96 # tell tells where your at _within the range_ (position 3 in
97 # this case)
98 >>> fo.tell()
99 # read EOFs if an attempt is made to read past the last
100 # byte in the range. the following will return only 7 bytes.
101 >>> fo.read(30)
102 """
103
104 def __init__(self, fo, rangetup):
105 """Create a RangeableFileObject.
106 fo -- a file like object. only the read() method need be
107 supported but supporting an optimized seek() is
108 preferable.
109 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
110 to work over.
111 The file object provided is assumed to be at byte offset 0.
112 """
113 self.fo = fo
114 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
115 self.realpos = 0
116 self._do_seek(self.firstbyte)
117
118 def __getattr__(self, name):
119 """This effectively allows us to wrap at the instance level.
120 Any attribute not found in _this_ object will be searched for
121 in self.fo. This includes methods."""
122 if hasattr(self.fo, name):
123 return getattr(self.fo, name)
124 raise AttributeError, name
125
126 def tell(self):
127 """Return the position within the range.
128 This is different from fo.seek in that position 0 is the
129 first byte position of the range tuple. For example, if
130 this object was created with a range tuple of (500,899),
131 tell() will return 0 when at byte position 500 of the file.
132 """
133 return (self.realpos - self.firstbyte)
134
135 def seek(self,offset,whence=0):
136 """Seek within the byte range.
137 Positioning is identical to that described under tell().
138 """
139 assert whence in (0, 1, 2)
140 if whence == 0: # absolute seek
141 realoffset = self.firstbyte + offset
142 elif whence == 1: # relative seek
143 realoffset = self.realpos + offset
144 elif whence == 2: # absolute from end of file
145 # XXX: are we raising the right Error here?
146 raise IOError('seek from end of file not supported.')
147
148 # do not allow seek past lastbyte in range
149 if self.lastbyte and (realoffset >= self.lastbyte):
150 realoffset = self.lastbyte
151
152 self._do_seek(realoffset - self.realpos)
153
154 def read(self, size=-1):
155 """Read within the range.
156 This method will limit the size read based on the range.
157 """
158 size = self._calc_read_size(size)
159 rslt = self.fo.read(size)
160 self.realpos += len(rslt)
161 return rslt
162
163 def readline(self, size=-1):
164 """Read lines within the range.
165 This method will limit the size read based on the range.
166 """
167 size = self._calc_read_size(size)
168 rslt = self.fo.readline(size)
169 self.realpos += len(rslt)
170 return rslt
171
172 def _calc_read_size(self, size):
173 """Handles calculating the amount of data to read based on
174 the range.
175 """
176 if self.lastbyte:
177 if size > -1:
178 if ((self.realpos + size) >= self.lastbyte):
179 size = (self.lastbyte - self.realpos)
180 else:
181 size = (self.lastbyte - self.realpos)
182 return size
183
184 def _do_seek(self,offset):
185 """Seek based on whether wrapped object supports seek().
186 offset is relative to the current position (self.realpos).
187 """
188 assert offset >= 0
189 if not hasattr(self.fo, 'seek'):
190 self._poor_mans_seek(offset)
191 else:
192 self.fo.seek(self.realpos + offset)
193 self.realpos+= offset
194
195 def _poor_mans_seek(self,offset):
196 """Seek by calling the wrapped file objects read() method.
197 This is used for file like objects that do not have native
198 seek support. The wrapped objects read() method is called
199 to manually seek to the desired position.
200 offset -- read this number of bytes from the wrapped
201 file object.
202 raise RangeError if we encounter EOF before reaching the
203 specified offset.
204 """
205 pos = 0
206 bufsize = 1024
207 while pos < offset:
208 if (pos + bufsize) > offset:
209 bufsize = offset - pos
210 buf = self.fo.read(bufsize)
211 if len(buf) != bufsize:
212 raise RangeError('Requested Range Not Satisfiable')
213 pos+= bufsize
214
215class FileRangeHandler(urllib2.FileHandler):
216 """FileHandler subclass that adds Range support.
217 This class handles Range headers exactly like an HTTP
218 server would.
219 """
220 def open_local_file(self, req):
221 import mimetypes
222 import mimetools
223 host = req.get_host()
224 file = req.get_selector()
225 localfile = urllib.url2pathname(file)
226 stats = os.stat(localfile)
227 size = stats[stat.ST_SIZE]
228 modified = rfc822.formatdate(stats[stat.ST_MTIME])
229 mtype = mimetypes.guess_type(file)[0]
230 if host:
231 host, port = urllib.splitport(host)
232 if port or socket.gethostbyname(host) not in self.get_names():
233 raise urllib2.URLError('file not on local host')
234 fo = open(localfile,'rb')
235 brange = req.headers.get('Range',None)
236 brange = range_header_to_tuple(brange)
237 assert brange != ()
238 if brange:
239 (fb,lb) = brange
240 if lb == '': lb = size
241 if fb < 0 or fb > size or lb > size:
242 raise RangeError('Requested Range Not Satisfiable')
243 size = (lb - fb)
244 fo = RangeableFileObject(fo, (fb,lb))
245 headers = mimetools.Message(StringIO(
246 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
247 (mtype or 'text/plain', size, modified)))
248 return urllib.addinfourl(fo, headers, 'file:'+file)
249
250
251# FTP Range Support
252# Unfortunately, a large amount of base FTP code had to be copied
253# from urllib and urllib2 in order to insert the FTP REST command.
254# Code modifications for range support have been commented as
255# follows:
256# -- range support modifications start/end here
257
258from urllib import splitport, splituser, splitpasswd, splitattr, \
259 unquote, addclosehook, addinfourl
260import ftplib
261import socket
262import sys
263import ftplib
264import mimetypes
265import mimetools
266
267class FTPRangeHandler(urllib2.FTPHandler):
268 def ftp_open(self, req):
269 host = req.get_host()
270 if not host:
271 raise IOError, ('ftp error', 'no host given')
272 host, port = splitport(host)
273 if port is None:
274 port = ftplib.FTP_PORT
275
276 # username/password handling
277 user, host = splituser(host)
278 if user:
279 user, passwd = splitpasswd(user)
280 else:
281 passwd = None
282 host = unquote(host)
283 user = unquote(user or '')
284 passwd = unquote(passwd or '')
285
286 try:
287 host = socket.gethostbyname(host)
288 except socket.error, msg:
289 raise urllib2.URLError(msg)
290 path, attrs = splitattr(req.get_selector())
291 dirs = path.split('/')
292 dirs = map(unquote, dirs)
293 dirs, file = dirs[:-1], dirs[-1]
294 if dirs and not dirs[0]:
295 dirs = dirs[1:]
296 try:
297 fw = self.connect_ftp(user, passwd, host, port, dirs)
298 type = file and 'I' or 'D'
299 for attr in attrs:
300 attr, value = splitattr(attr)
301 if attr.lower() == 'type' and \
302 value in ('a', 'A', 'i', 'I', 'd', 'D'):
303 type = value.upper()
304
305 # -- range support modifications start here
306 rest = None
307 range_tup = range_header_to_tuple(req.headers.get('Range',None))
308 assert range_tup != ()
309 if range_tup:
310 (fb,lb) = range_tup
311 if fb > 0: rest = fb
312 # -- range support modifications end here
313
314 fp, retrlen = fw.retrfile(file, type, rest)
315
316 # -- range support modifications start here
317 if range_tup:
318 (fb,lb) = range_tup
319 if lb == '':
320 if retrlen is None or retrlen == 0:
321 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
322 lb = retrlen
323 retrlen = lb - fb
324 if retrlen < 0:
325 # beginning of range is larger than file
326 raise RangeError('Requested Range Not Satisfiable')
327 else:
328 retrlen = lb - fb
329 fp = RangeableFileObject(fp, (0,retrlen))
330 # -- range support modifications end here
331
332 headers = ""
333 mtype = mimetypes.guess_type(req.get_full_url())[0]
334 if mtype:
335 headers += "Content-Type: %s\n" % mtype
336 if retrlen is not None and retrlen >= 0:
337 headers += "Content-Length: %d\n" % retrlen
338 sf = StringIO(headers)
339 headers = mimetools.Message(sf)
340 return addinfourl(fp, headers, req.get_full_url())
341 except ftplib.all_errors, msg:
342 raise IOError, ('ftp error', msg), sys.exc_info()[2]
343
344 def connect_ftp(self, user, passwd, host, port, dirs):
345 fw = ftpwrapper(user, passwd, host, port, dirs)
346 return fw
347
348class ftpwrapper(urllib.ftpwrapper):
349 # range support note:
350 # this ftpwrapper code is copied directly from
351 # urllib. The only enhancement is to add the rest
352 # argument and pass it on to ftp.ntransfercmd
353 def retrfile(self, file, type, rest=None):
354 self.endtransfer()
355 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
356 else: cmd = 'TYPE ' + type; isdir = 0
357 try:
358 self.ftp.voidcmd(cmd)
359 except ftplib.all_errors:
360 self.init()
361 self.ftp.voidcmd(cmd)
362 conn = None
363 if file and not isdir:
364 # Use nlst to see if the file exists at all
365 try:
366 self.ftp.nlst(file)
367 except ftplib.error_perm, reason:
368 raise IOError, ('ftp error', reason), sys.exc_info()[2]
369 # Restore the transfer mode!
370 self.ftp.voidcmd(cmd)
371 # Try to retrieve as a file
372 try:
373 cmd = 'RETR ' + file
374 conn = self.ftp.ntransfercmd(cmd, rest)
375 except ftplib.error_perm, reason:
376 if str(reason)[:3] == '501':
377 # workaround for REST not supported error
378 fp, retrlen = self.retrfile(file, type)
379 fp = RangeableFileObject(fp, (rest,''))
380 return (fp, retrlen)
381 elif str(reason)[:3] != '550':
382 raise IOError, ('ftp error', reason), sys.exc_info()[2]
383 if not conn:
384 # Set transfer mode to ASCII!
385 self.ftp.voidcmd('TYPE A')
386 # Try a directory listing
387 if file: cmd = 'LIST ' + file
388 else: cmd = 'LIST'
389 conn = self.ftp.ntransfercmd(cmd)
390 self.busy = 1
391 # Pass back both a suitably decorated object and a retrieval length
392 return (addclosehook(conn[0].makefile('rb'),
393 self.endtransfer), conn[1])
394
395
396####################################################################
397# Range Tuple Functions
398# XXX: These range tuple functions might go better in a class.
399
400_rangere = None
401def range_header_to_tuple(range_header):
402 """Get a (firstbyte,lastbyte) tuple from a Range header value.
403
404 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
405 function pulls the firstbyte and lastbyte values and returns
406 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
407 the header value, it is returned as an empty string in the
408 tuple.
409
410 Return None if range_header is None
411 Return () if range_header does not conform to the range spec
412 pattern.
413
414 """
415 global _rangere
416 if range_header is None: return None
417 if _rangere is None:
418 import re
419 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
420 match = _rangere.match(range_header)
421 if match:
422 tup = range_tuple_normalize(match.group(1,2))
423 if tup and tup[1]:
424 tup = (tup[0],tup[1]+1)
425 return tup
426 return ()
427
428def range_tuple_to_header(range_tup):
429 """Convert a range tuple to a Range header value.
430 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
431 if no range is needed.
432 """
433 if range_tup is None: return None
434 range_tup = range_tuple_normalize(range_tup)
435 if range_tup:
436 if range_tup[1]:
437 range_tup = (range_tup[0],range_tup[1] - 1)
438 return 'bytes=%s-%s' % range_tup
439
440def range_tuple_normalize(range_tup):
441 """Normalize a (first_byte,last_byte) range tuple.
442 Return a tuple whose first element is guaranteed to be an int
443 and whose second element will be '' (meaning: the last byte) or
444 an int. Finally, return None if the normalized tuple == (0,'')
445 as that is equivelant to retrieving the entire file.
446 """
447 if range_tup is None: return None
448 # handle first byte
449 fb = range_tup[0]
450 if fb in (None,''): fb = 0
451 else: fb = int(fb)
452 # handle last byte
453 try: lb = range_tup[1]
454 except IndexError: lb = ''
455 else:
456 if lb is None: lb = ''
457 elif lb != '': lb = int(lb)
458 # check if range is over the entire file
459 if (fb,lb) == (0,''): return None
460 # check that the range is valid
461 if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
462 return (fb,lb)
463
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py
deleted file mode 100644
index fefdab36f6..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py
+++ /dev/null
@@ -1,1477 +0,0 @@
1# This library is free software; you can redistribute it and/or
2# modify it under the terms of the GNU Lesser General Public
3# License as published by the Free Software Foundation; either
4# version 2.1 of the License, or (at your option) any later version.
5#
6# This library is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9# Lesser General Public License for more details.
10#
11# You should have received a copy of the GNU Lesser General Public
12# License along with this library; if not, write to the
13# Free Software Foundation, Inc.,
14# 59 Temple Place, Suite 330,
15# Boston, MA 02111-1307 USA
16
17# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
20"""A high-level cross-protocol url-grabber.
21
22GENERAL ARGUMENTS (kwargs)
23
24 Where possible, the module-level default is indicated, and legal
25 values are provided.
26
27 copy_local = 0 [0|1]
28
29 ignored except for file:// urls, in which case it specifies
30 whether urlgrab should still make a copy of the file, or simply
31 point to the existing copy. The module level default for this
32 option is 0.
33
34 close_connection = 0 [0|1]
35
36 tells URLGrabber to close the connection after a file has been
37 transfered. This is ignored unless the download happens with the
38 http keepalive handler (keepalive=1). Otherwise, the connection
39 is left open for further use. The module level default for this
40 option is 0 (keepalive connections will not be closed).
41
42 keepalive = 1 [0|1]
43
44 specifies whether keepalive should be used for HTTP/1.1 servers
45 that support it. The module level default for this option is 1
46 (keepalive is enabled).
47
48 progress_obj = None
49
50 a class instance that supports the following methods:
51 po.start(filename, url, basename, length, text)
52 # length will be None if unknown
53 po.update(read) # read == bytes read so far
54 po.end()
55
56 text = None
57
58 specifies an alternativ text item in the beginning of the progress
59 bar line. If not given, the basename of the file is used.
60
61 throttle = 1.0
62
63 a number - if it's an int, it's the bytes/second throttle limit.
64 If it's a float, it is first multiplied by bandwidth. If throttle
65 == 0, throttling is disabled. If None, the module-level default
66 (which can be set on default_grabber.throttle) is used. See
67 BANDWIDTH THROTTLING for more information.
68
69 timeout = None
70
71 a positive float expressing the number of seconds to wait for socket
72 operations. If the value is None or 0.0, socket operations will block
73 forever. Setting this option causes urlgrabber to call the settimeout
74 method on the Socket object used for the request. See the Python
75 documentation on settimeout for more information.
76 http://www.python.org/doc/current/lib/socket-objects.html
77
78 bandwidth = 0
79
80 the nominal max bandwidth in bytes/second. If throttle is a float
81 and bandwidth == 0, throttling is disabled. If None, the
82 module-level default (which can be set on
83 default_grabber.bandwidth) is used. See BANDWIDTH THROTTLING for
84 more information.
85
86 range = None
87
88 a tuple of the form (first_byte, last_byte) describing a byte
89 range to retrieve. Either or both of the values may set to
90 None. If first_byte is None, byte offset 0 is assumed. If
91 last_byte is None, the last byte available is assumed. Note that
92 the range specification is python-like in that (0,10) will yeild
93 the first 10 bytes of the file.
94
95 If set to None, no range will be used.
96
97 reget = None [None|'simple'|'check_timestamp']
98
99 whether to attempt to reget a partially-downloaded file. Reget
100 only applies to .urlgrab and (obviously) only if there is a
101 partially downloaded file. Reget has two modes:
102
103 'simple' -- the local file will always be trusted. If there
104 are 100 bytes in the local file, then the download will always
105 begin 100 bytes into the requested file.
106
107 'check_timestamp' -- the timestamp of the server file will be
108 compared to the timestamp of the local file. ONLY if the
109 local file is newer than or the same age as the server file
110 will reget be used. If the server file is newer, or the
111 timestamp is not returned, the entire file will be fetched.
112
113 NOTE: urlgrabber can do very little to verify that the partial
114 file on disk is identical to the beginning of the remote file.
115 You may want to either employ a custom "checkfunc" or simply avoid
116 using reget in situations where corruption is a concern.
117
118 user_agent = 'urlgrabber/VERSION'
119
120 a string, usually of the form 'AGENT/VERSION' that is provided to
121 HTTP servers in the User-agent header. The module level default
122 for this option is "urlgrabber/VERSION".
123
124 http_headers = None
125
126 a tuple of 2-tuples, each containing a header and value. These
127 will be used for http and https requests only. For example, you
128 can do
129 http_headers = (('Pragma', 'no-cache'),)
130
131 ftp_headers = None
132
133 this is just like http_headers, but will be used for ftp requests.
134
135 proxies = None
136
137 a dictionary that maps protocol schemes to proxy hosts. For
138 example, to use a proxy server on host "foo" port 3128 for http
139 and https URLs:
140 proxies={ 'http' : 'http://foo:3128', 'https' : 'http://foo:3128' }
141 note that proxy authentication information may be provided using
142 normal URL constructs:
143 proxies={ 'http' : 'http://user:host@foo:3128' }
144 Lastly, if proxies is None, the default environment settings will
145 be used.
146
147 prefix = None
148
149 a url prefix that will be prepended to all requested urls. For
150 example:
151 g = URLGrabber(prefix='http://foo.com/mirror/')
152 g.urlgrab('some/file.txt')
153 ## this will fetch 'http://foo.com/mirror/some/file.txt'
154 This option exists primarily to allow identical behavior to
155 MirrorGroup (and derived) instances. Note: a '/' will be inserted
156 if necessary, so you cannot specify a prefix that ends with a
157 partial file or directory name.
158
159 opener = None
160
161 Overrides the default urllib2.OpenerDirector provided to urllib2
162 when making requests. This option exists so that the urllib2
163 handler chain may be customized. Note that the range, reget,
164 proxy, and keepalive features require that custom handlers be
165 provided to urllib2 in order to function properly. If an opener
166 option is provided, no attempt is made by urlgrabber to ensure
167 chain integrity. You are responsible for ensuring that any
168 extension handlers are present if said features are required.
169
170 data = None
171
172 Only relevant for the HTTP family (and ignored for other
173 protocols), this allows HTTP POSTs. When the data kwarg is
174 present (and not None), an HTTP request will automatically become
175 a POST rather than GET. This is done by direct passthrough to
176 urllib2. If you use this, you may also want to set the
177 'Content-length' and 'Content-type' headers with the http_headers
178 option. Note that python 2.2 handles the case of these
179 badly and if you do not use the proper case (shown here), your
180 values will be overridden with the defaults.
181
182
183RETRY RELATED ARGUMENTS
184
185 retry = None
186
187 the number of times to retry the grab before bailing. If this is
188 zero, it will retry forever. This was intentional... really, it
189 was :). If this value is not supplied or is supplied but is None
190 retrying does not occur.
191
192 retrycodes = [-1,2,4,5,6,7]
193
194 a sequence of errorcodes (values of e.errno) for which it should
195 retry. See the doc on URLGrabError for more details on this. You
196 might consider modifying a copy of the default codes rather than
197 building yours from scratch so that if the list is extended in the
198 future (or one code is split into two) you can still enjoy the
199 benefits of the default list. You can do that with something like
200 this:
201
202 retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes
203 if 12 not in retrycodes:
204 retrycodes.append(12)
205
206 checkfunc = None
207
208 a function to do additional checks. This defaults to None, which
209 means no additional checking. The function should simply return
210 on a successful check. It should raise URLGrabError on an
211 unsuccessful check. Raising of any other exception will be
212 considered immediate failure and no retries will occur.
213
214 If it raises URLGrabError, the error code will determine the retry
215 behavior. Negative error numbers are reserved for use by these
216 passed in functions, so you can use many negative numbers for
217 different types of failure. By default, -1 results in a retry,
218 but this can be customized with retrycodes.
219
220 If you simply pass in a function, it will be given exactly one
221 argument: a CallbackObject instance with the .url attribute
222 defined and either .filename (for urlgrab) or .data (for urlread).
223 For urlgrab, .filename is the name of the local file. For
224 urlread, .data is the actual string data. If you need other
225 arguments passed to the callback (program state of some sort), you
226 can do so like this:
227
228 checkfunc=(function, ('arg1', 2), {'kwarg': 3})
229
230 if the downloaded file has filename /tmp/stuff, then this will
231 result in this call (for urlgrab):
232
233 function(obj, 'arg1', 2, kwarg=3)
234 # obj.filename = '/tmp/stuff'
235 # obj.url = 'http://foo.com/stuff'
236
237 NOTE: both the "args" tuple and "kwargs" dict must be present if
238 you use this syntax, but either (or both) can be empty.
239
240 failure_callback = None
241
242 The callback that gets called during retries when an attempt to
243 fetch a file fails. The syntax for specifying the callback is
244 identical to checkfunc, except for the attributes defined in the
245 CallbackObject instance. The attributes for failure_callback are:
246
247 exception = the raised exception
248 url = the url we're trying to fetch
249 tries = the number of tries so far (including this one)
250 retry = the value of the retry option
251
252 The callback is present primarily to inform the calling program of
253 the failure, but if it raises an exception (including the one it's
254 passed) that exception will NOT be caught and will therefore cause
255 future retries to be aborted.
256
257 The callback is called for EVERY failure, including the last one.
258 On the last try, the callback can raise an alternate exception,
259 but it cannot (without severe trickiness) prevent the exception
260 from being raised.
261
262 interrupt_callback = None
263
264 This callback is called if KeyboardInterrupt is received at any
265 point in the transfer. Basically, this callback can have three
266 impacts on the fetch process based on the way it exits:
267
268 1) raise no exception: the current fetch will be aborted, but
269 any further retries will still take place
270
271 2) raise a URLGrabError: if you're using a MirrorGroup, then
272 this will prompt a failover to the next mirror according to
273 the behavior of the MirrorGroup subclass. It is recommended
274 that you raise URLGrabError with code 15, 'user abort'. If
275 you are NOT using a MirrorGroup subclass, then this is the
276 same as (3).
277
278 3) raise some other exception (such as KeyboardInterrupt), which
279 will not be caught at either the grabber or mirror levels.
280 That is, it will be raised up all the way to the caller.
281
282 This callback is very similar to failure_callback. They are
283 passed the same arguments, so you could use the same function for
284 both.
285
286 urlparser = URLParser()
287
288 The URLParser class handles pre-processing of URLs, including
289 auth-handling for user/pass encoded in http urls, file handing
290 (that is, filenames not sent as a URL), and URL quoting. If you
291 want to override any of this behavior, you can pass in a
292 replacement instance. See also the 'quote' option.
293
294 quote = None
295
296 Whether or not to quote the path portion of a url.
297 quote = 1 -> quote the URLs (they're not quoted yet)
298 quote = 0 -> do not quote them (they're already quoted)
299 quote = None -> guess what to do
300
301 This option only affects proper urls like 'file:///etc/passwd'; it
302 does not affect 'raw' filenames like '/etc/passwd'. The latter
303 will always be quoted as they are converted to URLs. Also, only
304 the path part of a url is quoted. If you need more fine-grained
305 control, you should probably subclass URLParser and pass it in via
306 the 'urlparser' option.
307
308BANDWIDTH THROTTLING
309
310 urlgrabber supports throttling via two values: throttle and
311 bandwidth Between the two, you can either specify and absolute
312 throttle threshold or specify a theshold as a fraction of maximum
313 available bandwidth.
314
315 throttle is a number - if it's an int, it's the bytes/second
316 throttle limit. If it's a float, it is first multiplied by
317 bandwidth. If throttle == 0, throttling is disabled. If None, the
318 module-level default (which can be set with set_throttle) is used.
319
320 bandwidth is the nominal max bandwidth in bytes/second. If throttle
321 is a float and bandwidth == 0, throttling is disabled. If None, the
322 module-level default (which can be set with set_bandwidth) is used.
323
324 THROTTLING EXAMPLES:
325
326 Lets say you have a 100 Mbps connection. This is (about) 10^8 bits
327 per second, or 12,500,000 Bytes per second. You have a number of
328 throttling options:
329
330 *) set_bandwidth(12500000); set_throttle(0.5) # throttle is a float
331
332 This will limit urlgrab to use half of your available bandwidth.
333
334 *) set_throttle(6250000) # throttle is an int
335
336 This will also limit urlgrab to use half of your available
337 bandwidth, regardless of what bandwidth is set to.
338
339 *) set_throttle(6250000); set_throttle(1.0) # float
340
341 Use half your bandwidth
342
343 *) set_throttle(6250000); set_throttle(2.0) # float
344
345 Use up to 12,500,000 Bytes per second (your nominal max bandwidth)
346
347 *) set_throttle(6250000); set_throttle(0) # throttle = 0
348
349 Disable throttling - this is more efficient than a very large
350 throttle setting.
351
352 *) set_throttle(0); set_throttle(1.0) # throttle is float, bandwidth = 0
353
354 Disable throttling - this is the default when the module is loaded.
355
356 SUGGESTED AUTHOR IMPLEMENTATION (THROTTLING)
357
358 While this is flexible, it's not extremely obvious to the user. I
359 suggest you implement a float throttle as a percent to make the
360 distinction between absolute and relative throttling very explicit.
361
362 Also, you may want to convert the units to something more convenient
363 than bytes/second, such as kbps or kB/s, etc.
364
365"""
366
367# $Id: grabber.py,v 1.48 2006/09/22 00:58:05 mstenner Exp $
368
369import os
370import os.path
371import sys
372import urlparse
373import rfc822
374import time
375import string
376import urllib
377import urllib2
378from stat import * # S_* and ST_*
379
380########################################################################
381# MODULE INITIALIZATION
382########################################################################
383try:
384 exec('from ' + (__name__.split('.'))[0] + ' import __version__')
385except:
386 __version__ = '???'
387
388import sslfactory
389
390auth_handler = urllib2.HTTPBasicAuthHandler( \
391 urllib2.HTTPPasswordMgrWithDefaultRealm())
392
393try:
394 from i18n import _
395except ImportError, msg:
396 def _(st): return st
397
398try:
399 from httplib import HTTPException
400except ImportError, msg:
401 HTTPException = None
402
403try:
404 # This is a convenient way to make keepalive optional.
405 # Just rename the module so it can't be imported.
406 import keepalive
407 from keepalive import HTTPHandler, HTTPSHandler
408 have_keepalive = True
409except ImportError, msg:
410 have_keepalive = False
411
412try:
413 # add in range support conditionally too
414 import byterange
415 from byterange import HTTPRangeHandler, HTTPSRangeHandler, \
416 FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \
417 range_tuple_to_header, RangeError
418except ImportError, msg:
419 range_handlers = ()
420 RangeError = None
421 have_range = 0
422else:
423 range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(),
424 FileRangeHandler(), FTPRangeHandler())
425 have_range = 1
426
427
428# check whether socket timeout support is available (Python >= 2.3)
429import socket
430try:
431 TimeoutError = socket.timeout
432 have_socket_timeout = True
433except AttributeError:
434 TimeoutError = None
435 have_socket_timeout = False
436
437########################################################################
438# functions for debugging output. These functions are here because they
439# are also part of the module initialization.
440DEBUG = None
441def set_logger(DBOBJ):
442 """Set the DEBUG object. This is called by _init_default_logger when
443 the environment variable URLGRABBER_DEBUG is set, but can also be
444 called by a calling program. Basically, if the calling program uses
445 the logging module and would like to incorporate urlgrabber logging,
446 then it can do so this way. It's probably not necessary as most
447 internal logging is only for debugging purposes.
448
449 The passed-in object should be a logging.Logger instance. It will
450 be pushed into the keepalive and byterange modules if they're
451 being used. The mirror module pulls this object in on import, so
452 you will need to manually push into it. In fact, you may find it
453 tidier to simply push your logging object (or objects) into each
454 of these modules independently.
455 """
456
457 global DEBUG
458 DEBUG = DBOBJ
459 if have_keepalive and keepalive.DEBUG is None:
460 keepalive.DEBUG = DBOBJ
461 if have_range and byterange.DEBUG is None:
462 byterange.DEBUG = DBOBJ
463 if sslfactory.DEBUG is None:
464 sslfactory.DEBUG = DBOBJ
465
466def _init_default_logger():
467 '''Examines the environment variable URLGRABBER_DEBUG and creates
468 a logging object (logging.logger) based on the contents. It takes
469 the form
470
471 URLGRABBER_DEBUG=level,filename
472
473 where "level" can be either an integer or a log level from the
474 logging module (DEBUG, INFO, etc). If the integer is zero or
475 less, logging will be disabled. Filename is the filename where
476 logs will be sent. If it is "-", then stdout will be used. If
477 the filename is empty or missing, stderr will be used. If the
478 variable cannot be processed or the logging module cannot be
479 imported (python < 2.3) then logging will be disabled. Here are
480 some examples:
481
482 URLGRABBER_DEBUG=1,debug.txt # log everything to debug.txt
483 URLGRABBER_DEBUG=WARNING,- # log warning and higher to stdout
484 URLGRABBER_DEBUG=INFO # log info and higher to stderr
485
486 This funtion is called during module initialization. It is not
487 intended to be called from outside. The only reason it is a
488 function at all is to keep the module-level namespace tidy and to
489 collect the code into a nice block.'''
490
491 try:
492 dbinfo = os.environ['URLGRABBER_DEBUG'].split(',')
493 import logging
494 level = logging._levelNames.get(dbinfo[0], int(dbinfo[0]))
495 if level < 1: raise ValueError()
496
497 formatter = logging.Formatter('%(asctime)s %(message)s')
498 if len(dbinfo) > 1: filename = dbinfo[1]
499 else: filename = ''
500 if filename == '': handler = logging.StreamHandler(sys.stderr)
501 elif filename == '-': handler = logging.StreamHandler(sys.stdout)
502 else: handler = logging.FileHandler(filename)
503 handler.setFormatter(formatter)
504 DBOBJ = logging.getLogger('urlgrabber')
505 DBOBJ.addHandler(handler)
506 DBOBJ.setLevel(level)
507 except (KeyError, ImportError, ValueError):
508 DBOBJ = None
509 set_logger(DBOBJ)
510
511_init_default_logger()
512########################################################################
513# END MODULE INITIALIZATION
514########################################################################
515
516
517
518class URLGrabError(IOError):
519 """
520 URLGrabError error codes:
521
522 URLGrabber error codes (0 -- 255)
523 0 - everything looks good (you should never see this)
524 1 - malformed url
525 2 - local file doesn't exist
526 3 - request for non-file local file (dir, etc)
527 4 - IOError on fetch
528 5 - OSError on fetch
529 6 - no content length header when we expected one
530 7 - HTTPException
531 8 - Exceeded read limit (for urlread)
532 9 - Requested byte range not satisfiable.
533 10 - Byte range requested, but range support unavailable
534 11 - Illegal reget mode
535 12 - Socket timeout
536 13 - malformed proxy url
537 14 - HTTPError (includes .code and .exception attributes)
538 15 - user abort
539
540 MirrorGroup error codes (256 -- 511)
541 256 - No more mirrors left to try
542
543 Custom (non-builtin) classes derived from MirrorGroup (512 -- 767)
544 [ this range reserved for application-specific error codes ]
545
546 Retry codes (< 0)
547 -1 - retry the download, unknown reason
548
549 Note: to test which group a code is in, you can simply do integer
550 division by 256: e.errno / 256
551
552 Negative codes are reserved for use by functions passed in to
553 retrygrab with checkfunc. The value -1 is built in as a generic
554 retry code and is already included in the retrycodes list.
555 Therefore, you can create a custom check function that simply
556 returns -1 and the fetch will be re-tried. For more customized
557 retries, you can use other negative number and include them in
558 retry-codes. This is nice for outputting useful messages about
559 what failed.
560
561 You can use these error codes like so:
562 try: urlgrab(url)
563 except URLGrabError, e:
564 if e.errno == 3: ...
565 # or
566 print e.strerror
567 # or simply
568 print e #### print '[Errno %i] %s' % (e.errno, e.strerror)
569 """
570 pass
571
572class CallbackObject:
573 """Container for returned callback data.
574
575 This is currently a dummy class into which urlgrabber can stuff
576 information for passing to callbacks. This way, the prototype for
577 all callbacks is the same, regardless of the data that will be
578 passed back. Any function that accepts a callback function as an
579 argument SHOULD document what it will define in this object.
580
581 It is possible that this class will have some greater
582 functionality in the future.
583 """
584 def __init__(self, **kwargs):
585 self.__dict__.update(kwargs)
586
587def urlgrab(url, filename=None, **kwargs):
588 """grab the file at <url> and make a local copy at <filename>
589 If filename is none, the basename of the url is used.
590 urlgrab returns the filename of the local file, which may be different
591 from the passed-in filename if the copy_local kwarg == 0.
592
593 See module documentation for a description of possible kwargs.
594 """
595 return default_grabber.urlgrab(url, filename, **kwargs)
596
597def urlopen(url, **kwargs):
598 """open the url and return a file object
599 If a progress object or throttle specifications exist, then
600 a special file object will be returned that supports them.
601 The file object can be treated like any other file object.
602
603 See module documentation for a description of possible kwargs.
604 """
605 return default_grabber.urlopen(url, **kwargs)
606
607def urlread(url, limit=None, **kwargs):
608 """read the url into a string, up to 'limit' bytes
609 If the limit is exceeded, an exception will be thrown. Note that urlread
610 is NOT intended to be used as a way of saying "I want the first N bytes"
611 but rather 'read the whole file into memory, but don't use too much'
612
613 See module documentation for a description of possible kwargs.
614 """
615 return default_grabber.urlread(url, limit, **kwargs)
616
617
618class URLParser:
619 """Process the URLs before passing them to urllib2.
620
621 This class does several things:
622
623 * add any prefix
624 * translate a "raw" file to a proper file: url
625 * handle any http or https auth that's encoded within the url
626 * quote the url
627
628 Only the "parse" method is called directly, and it calls sub-methods.
629
630 An instance of this class is held in the options object, which
631 means that it's easy to change the behavior by sub-classing and
632 passing the replacement in. It need only have a method like:
633
634 url, parts = urlparser.parse(url, opts)
635 """
636
637 def parse(self, url, opts):
638 """parse the url and return the (modified) url and its parts
639
640 Note: a raw file WILL be quoted when it's converted to a URL.
641 However, other urls (ones which come with a proper scheme) may
642 or may not be quoted according to opts.quote
643
644 opts.quote = 1 --> quote it
645 opts.quote = 0 --> do not quote it
646 opts.quote = None --> guess
647 """
648 quote = opts.quote
649
650 if opts.prefix:
651 url = self.add_prefix(url, opts.prefix)
652
653 parts = urlparse.urlparse(url)
654 (scheme, host, path, parm, query, frag) = parts
655
656 if not scheme or (len(scheme) == 1 and scheme in string.letters):
657 # if a scheme isn't specified, we guess that it's "file:"
658 if url[0] not in '/\\': url = os.path.abspath(url)
659 url = 'file:' + urllib.pathname2url(url)
660 parts = urlparse.urlparse(url)
661 quote = 0 # pathname2url quotes, so we won't do it again
662
663 if scheme in ['http', 'https']:
664 parts = self.process_http(parts)
665
666 if quote is None:
667 quote = self.guess_should_quote(parts)
668 if quote:
669 parts = self.quote(parts)
670
671 url = urlparse.urlunparse(parts)
672 return url, parts
673
674 def add_prefix(self, url, prefix):
675 if prefix[-1] == '/' or url[0] == '/':
676 url = prefix + url
677 else:
678 url = prefix + '/' + url
679 return url
680
681 def process_http(self, parts):
682 (scheme, host, path, parm, query, frag) = parts
683
684 if '@' in host and auth_handler:
685 try:
686 user_pass, host = host.split('@', 1)
687 if ':' in user_pass:
688 user, password = user_pass.split(':', 1)
689 except ValueError, e:
690 raise URLGrabError(1, _('Bad URL: %s') % url)
691 if DEBUG: DEBUG.info('adding HTTP auth: %s, XXXXXXXX', user)
692 auth_handler.add_password(None, host, user, password)
693
694 return (scheme, host, path, parm, query, frag)
695
696 def quote(self, parts):
697 """quote the URL
698
699 This method quotes ONLY the path part. If you need to quote
700 other parts, you should override this and pass in your derived
701 class. The other alternative is to quote other parts before
702 passing into urlgrabber.
703 """
704 (scheme, host, path, parm, query, frag) = parts
705 path = urllib.quote(path)
706 return (scheme, host, path, parm, query, frag)
707
708 hexvals = '0123456789ABCDEF'
709 def guess_should_quote(self, parts):
710 """
711 Guess whether we should quote a path. This amounts to
712 guessing whether it's already quoted.
713
714 find ' ' -> 1
715 find '%' -> 1
716 find '%XX' -> 0
717 else -> 1
718 """
719 (scheme, host, path, parm, query, frag) = parts
720 if ' ' in path:
721 return 1
722 ind = string.find(path, '%')
723 if ind > -1:
724 while ind > -1:
725 if len(path) < ind+3:
726 return 1
727 code = path[ind+1:ind+3].upper()
728 if code[0] not in self.hexvals or \
729 code[1] not in self.hexvals:
730 return 1
731 ind = string.find(path, '%', ind+1)
732 return 0
733 return 1
734
735class URLGrabberOptions:
736 """Class to ease kwargs handling."""
737
738 def __init__(self, delegate=None, **kwargs):
739 """Initialize URLGrabberOptions object.
740 Set default values for all options and then update options specified
741 in kwargs.
742 """
743 self.delegate = delegate
744 if delegate is None:
745 self._set_defaults()
746 self._set_attributes(**kwargs)
747
748 def __getattr__(self, name):
749 if self.delegate and hasattr(self.delegate, name):
750 return getattr(self.delegate, name)
751 raise AttributeError, name
752
753 def raw_throttle(self):
754 """Calculate raw throttle value from throttle and bandwidth
755 values.
756 """
757 if self.throttle <= 0:
758 return 0
759 elif type(self.throttle) == type(0):
760 return float(self.throttle)
761 else: # throttle is a float
762 return self.bandwidth * self.throttle
763
764 def derive(self, **kwargs):
765 """Create a derived URLGrabberOptions instance.
766 This method creates a new instance and overrides the
767 options specified in kwargs.
768 """
769 return URLGrabberOptions(delegate=self, **kwargs)
770
771 def _set_attributes(self, **kwargs):
772 """Update object attributes with those provided in kwargs."""
773 self.__dict__.update(kwargs)
774 if have_range and kwargs.has_key('range'):
775 # normalize the supplied range value
776 self.range = range_tuple_normalize(self.range)
777 if not self.reget in [None, 'simple', 'check_timestamp']:
778 raise URLGrabError(11, _('Illegal reget mode: %s') \
779 % (self.reget, ))
780
781 def _set_defaults(self):
782 """Set all options to their default values.
783 When adding new options, make sure a default is
784 provided here.
785 """
786 self.progress_obj = None
787 self.throttle = 1.0
788 self.bandwidth = 0
789 self.retry = None
790 self.retrycodes = [-1,2,4,5,6,7]
791 self.checkfunc = None
792 self.copy_local = 0
793 self.close_connection = 0
794 self.range = None
795 self.user_agent = 'urlgrabber/%s' % __version__
796 self.keepalive = 1
797 self.proxies = None
798 self.reget = None
799 self.failure_callback = None
800 self.interrupt_callback = None
801 self.prefix = None
802 self.opener = None
803 self.cache_openers = True
804 self.timeout = None
805 self.text = None
806 self.http_headers = None
807 self.ftp_headers = None
808 self.data = None
809 self.urlparser = URLParser()
810 self.quote = None
811 self.ssl_ca_cert = None
812 self.ssl_context = None
813
814class URLGrabber:
815 """Provides easy opening of URLs with a variety of options.
816
817 All options are specified as kwargs. Options may be specified when
818 the class is created and may be overridden on a per request basis.
819
820 New objects inherit default values from default_grabber.
821 """
822
823 def __init__(self, **kwargs):
824 self.opts = URLGrabberOptions(**kwargs)
825
826 def _retry(self, opts, func, *args):
827 tries = 0
828 while 1:
829 # there are only two ways out of this loop. The second has
830 # several "sub-ways"
831 # 1) via the return in the "try" block
832 # 2) by some exception being raised
833 # a) an excepton is raised that we don't "except"
834 # b) a callback raises ANY exception
835 # c) we're not retry-ing or have run out of retries
836 # d) the URLGrabError code is not in retrycodes
837 # beware of infinite loops :)
838 tries = tries + 1
839 exception = None
840 retrycode = None
841 callback = None
842 if DEBUG: DEBUG.info('attempt %i/%s: %s',
843 tries, opts.retry, args[0])
844 try:
845 r = apply(func, (opts,) + args, {})
846 if DEBUG: DEBUG.info('success')
847 return r
848 except URLGrabError, e:
849 exception = e
850 callback = opts.failure_callback
851 retrycode = e.errno
852 except KeyboardInterrupt, e:
853 exception = e
854 callback = opts.interrupt_callback
855
856 if DEBUG: DEBUG.info('exception: %s', exception)
857 if callback:
858 if DEBUG: DEBUG.info('calling callback: %s', callback)
859 cb_func, cb_args, cb_kwargs = self._make_callback(callback)
860 obj = CallbackObject(exception=exception, url=args[0],
861 tries=tries, retry=opts.retry)
862 cb_func(obj, *cb_args, **cb_kwargs)
863
864 if (opts.retry is None) or (tries == opts.retry):
865 if DEBUG: DEBUG.info('retries exceeded, re-raising')
866 raise
867
868 if (retrycode is not None) and (retrycode not in opts.retrycodes):
869 if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',
870 retrycode, opts.retrycodes)
871 raise
872
873 def urlopen(self, url, **kwargs):
874 """open the url and return a file object
875 If a progress object or throttle value specified when this
876 object was created, then a special file object will be
877 returned that supports them. The file object can be treated
878 like any other file object.
879 """
880 opts = self.opts.derive(**kwargs)
881 (url,parts) = opts.urlparser.parse(url, opts)
882 def retryfunc(opts, url):
883 return URLGrabberFileObject(url, filename=None, opts=opts)
884 return self._retry(opts, retryfunc, url)
885
886 def urlgrab(self, url, filename=None, **kwargs):
887 """grab the file at <url> and make a local copy at <filename>
888 If filename is none, the basename of the url is used.
889 urlgrab returns the filename of the local file, which may be
890 different from the passed-in filename if copy_local == 0.
891 """
892 opts = self.opts.derive(**kwargs)
893 (url,parts) = opts.urlparser.parse(url, opts)
894 (scheme, host, path, parm, query, frag) = parts
895 if filename is None:
896 filename = os.path.basename( urllib.unquote(path) )
897 if scheme == 'file' and not opts.copy_local:
898 # just return the name of the local file - don't make a
899 # copy currently
900 path = urllib.url2pathname(path)
901 if host:
902 path = os.path.normpath('//' + host + path)
903 if not os.path.exists(path):
904 raise URLGrabError(2,
905 _('Local file does not exist: %s') % (path, ))
906 elif not os.path.isfile(path):
907 raise URLGrabError(3,
908 _('Not a normal file: %s') % (path, ))
909 elif not opts.range:
910 return path
911
912 def retryfunc(opts, url, filename):
913 fo = URLGrabberFileObject(url, filename, opts)
914 try:
915 fo._do_grab()
916 if not opts.checkfunc is None:
917 cb_func, cb_args, cb_kwargs = \
918 self._make_callback(opts.checkfunc)
919 obj = CallbackObject()
920 obj.filename = filename
921 obj.url = url
922 apply(cb_func, (obj, )+cb_args, cb_kwargs)
923 finally:
924 fo.close()
925 return filename
926
927 return self._retry(opts, retryfunc, url, filename)
928
929 def urlread(self, url, limit=None, **kwargs):
930 """read the url into a string, up to 'limit' bytes
931 If the limit is exceeded, an exception will be thrown. Note
932 that urlread is NOT intended to be used as a way of saying
933 "I want the first N bytes" but rather 'read the whole file
934 into memory, but don't use too much'
935 """
936 opts = self.opts.derive(**kwargs)
937 (url,parts) = opts.urlparser.parse(url, opts)
938 if limit is not None:
939 limit = limit + 1
940
941 def retryfunc(opts, url, limit):
942 fo = URLGrabberFileObject(url, filename=None, opts=opts)
943 s = ''
944 try:
945 # this is an unfortunate thing. Some file-like objects
946 # have a default "limit" of None, while the built-in (real)
947 # file objects have -1. They each break the other, so for
948 # now, we just force the default if necessary.
949 if limit is None: s = fo.read()
950 else: s = fo.read(limit)
951
952 if not opts.checkfunc is None:
953 cb_func, cb_args, cb_kwargs = \
954 self._make_callback(opts.checkfunc)
955 obj = CallbackObject()
956 obj.data = s
957 obj.url = url
958 apply(cb_func, (obj, )+cb_args, cb_kwargs)
959 finally:
960 fo.close()
961 return s
962
963 s = self._retry(opts, retryfunc, url, limit)
964 if limit and len(s) > limit:
965 raise URLGrabError(8,
966 _('Exceeded limit (%i): %s') % (limit, url))
967 return s
968
969 def _make_callback(self, callback_obj):
970 if callable(callback_obj):
971 return callback_obj, (), {}
972 else:
973 return callback_obj
974
975# create the default URLGrabber used by urlXXX functions.
976# NOTE: actual defaults are set in URLGrabberOptions
977default_grabber = URLGrabber()
978
979class URLGrabberFileObject:
980 """This is a file-object wrapper that supports progress objects
981 and throttling.
982
983 This exists to solve the following problem: lets say you want to
984 drop-in replace a normal open with urlopen. You want to use a
985 progress meter and/or throttling, but how do you do that without
986 rewriting your code? Answer: urlopen will return a wrapped file
987 object that does the progress meter and-or throttling internally.
988 """
989
990 def __init__(self, url, filename, opts):
991 self.url = url
992 self.filename = filename
993 self.opts = opts
994 self.fo = None
995 self._rbuf = ''
996 self._rbufsize = 1024*8
997 self._ttime = time.time()
998 self._tsize = 0
999 self._amount_read = 0
1000 self._opener = None
1001 self._do_open()
1002
1003 def __getattr__(self, name):
1004 """This effectively allows us to wrap at the instance level.
1005 Any attribute not found in _this_ object will be searched for
1006 in self.fo. This includes methods."""
1007 if hasattr(self.fo, name):
1008 return getattr(self.fo, name)
1009 raise AttributeError, name
1010
1011 def _get_opener(self):
1012 """Build a urllib2 OpenerDirector based on request options."""
1013 if self.opts.opener:
1014 return self.opts.opener
1015 elif self._opener is None:
1016 handlers = []
1017 need_keepalive_handler = (have_keepalive and self.opts.keepalive)
1018 need_range_handler = (range_handlers and \
1019 (self.opts.range or self.opts.reget))
1020 # if you specify a ProxyHandler when creating the opener
1021 # it _must_ come before all other handlers in the list or urllib2
1022 # chokes.
1023 if self.opts.proxies:
1024 handlers.append( CachedProxyHandler(self.opts.proxies) )
1025
1026 # -------------------------------------------------------
1027 # OK, these next few lines are a serious kludge to get
1028 # around what I think is a bug in python 2.2's
1029 # urllib2. The basic idea is that default handlers
1030 # get applied first. If you override one (like a
1031 # proxy handler), then the default gets pulled, but
1032 # the replacement goes on the end. In the case of
1033 # proxies, this means the normal handler picks it up
1034 # first and the proxy isn't used. Now, this probably
1035 # only happened with ftp or non-keepalive http, so not
1036 # many folks saw it. The simple approach to fixing it
1037 # is just to make sure you override the other
1038 # conflicting defaults as well. I would LOVE to see
1039 # these go way or be dealt with more elegantly. The
1040 # problem isn't there after 2.2. -MDS 2005/02/24
1041 if not need_keepalive_handler:
1042 handlers.append( urllib2.HTTPHandler() )
1043 if not need_range_handler:
1044 handlers.append( urllib2.FTPHandler() )
1045 # -------------------------------------------------------
1046
1047 ssl_factory = sslfactory.get_factory(self.opts.ssl_ca_cert,
1048 self.opts.ssl_context)
1049
1050 if need_keepalive_handler:
1051 handlers.append(HTTPHandler())
1052 handlers.append(HTTPSHandler(ssl_factory))
1053 if need_range_handler:
1054 handlers.extend( range_handlers )
1055 handlers.append( auth_handler )
1056 if self.opts.cache_openers:
1057 self._opener = CachedOpenerDirector(ssl_factory, *handlers)
1058 else:
1059 self._opener = ssl_factory.create_opener(*handlers)
1060 # OK, I don't like to do this, but otherwise, we end up with
1061 # TWO user-agent headers.
1062 self._opener.addheaders = []
1063 return self._opener
1064
1065 def _do_open(self):
1066 opener = self._get_opener()
1067
1068 req = urllib2.Request(self.url, self.opts.data) # build request object
1069 self._add_headers(req) # add misc headers that we need
1070 self._build_range(req) # take care of reget and byterange stuff
1071
1072 fo, hdr = self._make_request(req, opener)
1073 if self.reget_time and self.opts.reget == 'check_timestamp':
1074 # do this if we have a local file with known timestamp AND
1075 # we're in check_timestamp reget mode.
1076 fetch_again = 0
1077 try:
1078 modified_tuple = hdr.getdate_tz('last-modified')
1079 modified_stamp = rfc822.mktime_tz(modified_tuple)
1080 if modified_stamp > self.reget_time: fetch_again = 1
1081 except (TypeError,):
1082 fetch_again = 1
1083
1084 if fetch_again:
1085 # the server version is newer than the (incomplete) local
1086 # version, so we should abandon the version we're getting
1087 # and fetch the whole thing again.
1088 fo.close()
1089 self.opts.reget = None
1090 del req.headers['Range']
1091 self._build_range(req)
1092 fo, hdr = self._make_request(req, opener)
1093
1094 (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url)
1095 path = urllib.unquote(path)
1096 if not (self.opts.progress_obj or self.opts.raw_throttle() \
1097 or self.opts.timeout):
1098 # if we're not using the progress_obj, throttling, or timeout
1099 # we can get a performance boost by going directly to
1100 # the underlying fileobject for reads.
1101 self.read = fo.read
1102 if hasattr(fo, 'readline'):
1103 self.readline = fo.readline
1104 elif self.opts.progress_obj:
1105 try:
1106 length = int(hdr['Content-Length'])
1107 length = length + self._amount_read # Account for regets
1108 except (KeyError, ValueError, TypeError):
1109 length = None
1110
1111 self.opts.progress_obj.start(str(self.filename),
1112 urllib.unquote(self.url),
1113 os.path.basename(path),
1114 length, text=self.opts.text)
1115 self.opts.progress_obj.update(0)
1116 (self.fo, self.hdr) = (fo, hdr)
1117
1118 def _add_headers(self, req):
1119 if self.opts.user_agent:
1120 req.add_header('User-agent', self.opts.user_agent)
1121 try: req_type = req.get_type()
1122 except ValueError: req_type = None
1123 if self.opts.http_headers and req_type in ('http', 'https'):
1124 for h, v in self.opts.http_headers:
1125 req.add_header(h, v)
1126 if self.opts.ftp_headers and req_type == 'ftp':
1127 for h, v in self.opts.ftp_headers:
1128 req.add_header(h, v)
1129
1130 def _build_range(self, req):
1131 self.reget_time = None
1132 self.append = 0
1133 reget_length = 0
1134 rt = None
1135 if have_range and self.opts.reget and type(self.filename) == type(''):
1136 # we have reget turned on and we're dumping to a file
1137 try:
1138 s = os.stat(self.filename)
1139 except OSError:
1140 pass
1141 else:
1142 self.reget_time = s[ST_MTIME]
1143 reget_length = s[ST_SIZE]
1144
1145 # Set initial length when regetting
1146 self._amount_read = reget_length
1147
1148 rt = reget_length, ''
1149 self.append = 1
1150
1151 if self.opts.range:
1152 if not have_range:
1153 raise URLGrabError(10, _('Byte range requested but range '\
1154 'support unavailable'))
1155 rt = self.opts.range
1156 if rt[0]: rt = (rt[0] + reget_length, rt[1])
1157
1158 if rt:
1159 header = range_tuple_to_header(rt)
1160 if header: req.add_header('Range', header)
1161
1162 def _make_request(self, req, opener):
1163 try:
1164 if have_socket_timeout and self.opts.timeout:
1165 old_to = socket.getdefaulttimeout()
1166 socket.setdefaulttimeout(self.opts.timeout)
1167 try:
1168 fo = opener.open(req)
1169 finally:
1170 socket.setdefaulttimeout(old_to)
1171 else:
1172 fo = opener.open(req)
1173 hdr = fo.info()
1174 except ValueError, e:
1175 raise URLGrabError(1, _('Bad URL: %s') % (e, ))
1176 except RangeError, e:
1177 raise URLGrabError(9, str(e))
1178 except urllib2.HTTPError, e:
1179 new_e = URLGrabError(14, str(e))
1180 new_e.code = e.code
1181 new_e.exception = e
1182 raise new_e
1183 except IOError, e:
1184 if hasattr(e, 'reason') and have_socket_timeout and \
1185 isinstance(e.reason, TimeoutError):
1186 raise URLGrabError(12, _('Timeout: %s') % (e, ))
1187 else:
1188 raise URLGrabError(4, _('IOError: %s') % (e, ))
1189 except OSError, e:
1190 raise URLGrabError(5, _('OSError: %s') % (e, ))
1191 except HTTPException, e:
1192 raise URLGrabError(7, _('HTTP Exception (%s): %s') % \
1193 (e.__class__.__name__, e))
1194 else:
1195 return (fo, hdr)
1196
1197 def _do_grab(self):
1198 """dump the file to self.filename."""
1199 if self.append: new_fo = open(self.filename, 'ab')
1200 else: new_fo = open(self.filename, 'wb')
1201 bs = 1024*8
1202 size = 0
1203
1204 block = self.read(bs)
1205 size = size + len(block)
1206 while block:
1207 new_fo.write(block)
1208 block = self.read(bs)
1209 size = size + len(block)
1210
1211 new_fo.close()
1212 try:
1213 modified_tuple = self.hdr.getdate_tz('last-modified')
1214 modified_stamp = rfc822.mktime_tz(modified_tuple)
1215 os.utime(self.filename, (modified_stamp, modified_stamp))
1216 except (TypeError,), e: pass
1217
1218 return size
1219
1220 def _fill_buffer(self, amt=None):
1221 """fill the buffer to contain at least 'amt' bytes by reading
1222 from the underlying file object. If amt is None, then it will
1223 read until it gets nothing more. It updates the progress meter
1224 and throttles after every self._rbufsize bytes."""
1225 # the _rbuf test is only in this first 'if' for speed. It's not
1226 # logically necessary
1227 if self._rbuf and not amt is None:
1228 L = len(self._rbuf)
1229 if amt > L:
1230 amt = amt - L
1231 else:
1232 return
1233
1234 # if we've made it here, then we don't have enough in the buffer
1235 # and we need to read more.
1236
1237 buf = [self._rbuf]
1238 bufsize = len(self._rbuf)
1239 while amt is None or amt:
1240 # first, delay if necessary for throttling reasons
1241 if self.opts.raw_throttle():
1242 diff = self._tsize/self.opts.raw_throttle() - \
1243 (time.time() - self._ttime)
1244 if diff > 0: time.sleep(diff)
1245 self._ttime = time.time()
1246
1247 # now read some data, up to self._rbufsize
1248 if amt is None: readamount = self._rbufsize
1249 else: readamount = min(amt, self._rbufsize)
1250 try:
1251 new = self.fo.read(readamount)
1252 except socket.error, e:
1253 raise URLGrabError(4, _('Socket Error: %s') % (e, ))
1254 except TimeoutError, e:
1255 raise URLGrabError(12, _('Timeout: %s') % (e, ))
1256 except IOError, e:
1257 raise URLGrabError(4, _('IOError: %s') %(e,))
1258 newsize = len(new)
1259 if not newsize: break # no more to read
1260
1261 if amt: amt = amt - newsize
1262 buf.append(new)
1263 bufsize = bufsize + newsize
1264 self._tsize = newsize
1265 self._amount_read = self._amount_read + newsize
1266 if self.opts.progress_obj:
1267 self.opts.progress_obj.update(self._amount_read)
1268
1269 self._rbuf = string.join(buf, '')
1270 return
1271
1272 def read(self, amt=None):
1273 self._fill_buffer(amt)
1274 if amt is None:
1275 s, self._rbuf = self._rbuf, ''
1276 else:
1277 s, self._rbuf = self._rbuf[:amt], self._rbuf[amt:]
1278 return s
1279
1280 def readline(self, limit=-1):
1281 i = string.find(self._rbuf, '\n')
1282 while i < 0 and not (0 < limit <= len(self._rbuf)):
1283 L = len(self._rbuf)
1284 self._fill_buffer(L + self._rbufsize)
1285 if not len(self._rbuf) > L: break
1286 i = string.find(self._rbuf, '\n', L)
1287
1288 if i < 0: i = len(self._rbuf)
1289 else: i = i+1
1290 if 0 <= limit < len(self._rbuf): i = limit
1291
1292 s, self._rbuf = self._rbuf[:i], self._rbuf[i:]
1293 return s
1294
1295 def close(self):
1296 if self.opts.progress_obj:
1297 self.opts.progress_obj.end(self._amount_read)
1298 self.fo.close()
1299 if self.opts.close_connection:
1300 try: self.fo.close_connection()
1301 except: pass
1302
1303_handler_cache = []
1304def CachedOpenerDirector(ssl_factory = None, *handlers):
1305 for (cached_handlers, opener) in _handler_cache:
1306 if cached_handlers == handlers:
1307 for handler in opener.handlers:
1308 handler.add_parent(opener)
1309 return opener
1310 if not ssl_factory:
1311 ssl_factory = sslfactory.get_factory()
1312 opener = ssl_factory.create_opener(*handlers)
1313 _handler_cache.append( (handlers, opener) )
1314 return opener
1315
1316_proxy_cache = []
1317def CachedProxyHandler(proxies):
1318 for (pdict, handler) in _proxy_cache:
1319 if pdict == proxies:
1320 if DEBUG: DEBUG.debug('re-using proxy settings: %s', proxies)
1321 break
1322 else:
1323 for k, v in proxies.items():
1324 utype, url = urllib.splittype(v)
1325 host, other = urllib.splithost(url)
1326 if (utype is None) or (host is None):
1327 raise URLGrabError(13, _('Bad proxy URL: %s') % v)
1328
1329 if DEBUG: DEBUG.info('creating new proxy handler: %s', proxies)
1330 handler = urllib2.ProxyHandler(proxies)
1331 _proxy_cache.append( (proxies, handler) )
1332 return handler
1333
1334#####################################################################
1335# DEPRECATED FUNCTIONS
1336def set_throttle(new_throttle):
1337 """Deprecated. Use: default_grabber.throttle = new_throttle"""
1338 default_grabber.throttle = new_throttle
1339
1340def set_bandwidth(new_bandwidth):
1341 """Deprecated. Use: default_grabber.bandwidth = new_bandwidth"""
1342 default_grabber.bandwidth = new_bandwidth
1343
1344def set_progress_obj(new_progress_obj):
1345 """Deprecated. Use: default_grabber.progress_obj = new_progress_obj"""
1346 default_grabber.progress_obj = new_progress_obj
1347
1348def set_user_agent(new_user_agent):
1349 """Deprecated. Use: default_grabber.user_agent = new_user_agent"""
1350 default_grabber.user_agent = new_user_agent
1351
1352def retrygrab(url, filename=None, copy_local=0, close_connection=0,
1353 progress_obj=None, throttle=None, bandwidth=None,
1354 numtries=3, retrycodes=[-1,2,4,5,6,7], checkfunc=None):
1355 """Deprecated. Use: urlgrab() with the retry arg instead"""
1356 kwargs = {'copy_local' : copy_local,
1357 'close_connection' : close_connection,
1358 'progress_obj' : progress_obj,
1359 'throttle' : throttle,
1360 'bandwidth' : bandwidth,
1361 'retry' : numtries,
1362 'retrycodes' : retrycodes,
1363 'checkfunc' : checkfunc
1364 }
1365 return urlgrab(url, filename, **kwargs)
1366
1367
1368#####################################################################
1369# TESTING
1370def _main_test():
1371 import sys
1372 try: url, filename = sys.argv[1:3]
1373 except ValueError:
1374 print 'usage:', sys.argv[0], \
1375 '<url> <filename> [copy_local=0|1] [close_connection=0|1]'
1376 sys.exit()
1377
1378 kwargs = {}
1379 for a in sys.argv[3:]:
1380 k, v = string.split(a, '=', 1)
1381 kwargs[k] = int(v)
1382
1383 set_throttle(1.0)
1384 set_bandwidth(32 * 1024)
1385 print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle,
1386 default_grabber.bandwidth)
1387
1388 try: from progress import text_progress_meter
1389 except ImportError, e: pass
1390 else: kwargs['progress_obj'] = text_progress_meter()
1391
1392 try: name = apply(urlgrab, (url, filename), kwargs)
1393 except URLGrabError, e: print e
1394 else: print 'LOCAL FILE:', name
1395
1396
1397def _retry_test():
1398 import sys
1399 try: url, filename = sys.argv[1:3]
1400 except ValueError:
1401 print 'usage:', sys.argv[0], \
1402 '<url> <filename> [copy_local=0|1] [close_connection=0|1]'
1403 sys.exit()
1404
1405 kwargs = {}
1406 for a in sys.argv[3:]:
1407 k, v = string.split(a, '=', 1)
1408 kwargs[k] = int(v)
1409
1410 try: from progress import text_progress_meter
1411 except ImportError, e: pass
1412 else: kwargs['progress_obj'] = text_progress_meter()
1413
1414 def cfunc(filename, hello, there='foo'):
1415 print hello, there
1416 import random
1417 rnum = random.random()
1418 if rnum < .5:
1419 print 'forcing retry'
1420 raise URLGrabError(-1, 'forcing retry')
1421 if rnum < .75:
1422 print 'forcing failure'
1423 raise URLGrabError(-2, 'forcing immediate failure')
1424 print 'success'
1425 return
1426
1427 kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'})
1428 try: name = apply(retrygrab, (url, filename), kwargs)
1429 except URLGrabError, e: print e
1430 else: print 'LOCAL FILE:', name
1431
1432def _file_object_test(filename=None):
1433 import random, cStringIO, sys
1434 if filename is None:
1435 filename = __file__
1436 print 'using file "%s" for comparisons' % filename
1437 fo = open(filename)
1438 s_input = fo.read()
1439 fo.close()
1440
1441 for testfunc in [_test_file_object_smallread,
1442 _test_file_object_readall,
1443 _test_file_object_readline,
1444 _test_file_object_readlines]:
1445 fo_input = cStringIO.StringIO(s_input)
1446 fo_output = cStringIO.StringIO()
1447 wrapper = URLGrabberFileObject(fo_input, None, 0)
1448 print 'testing %-30s ' % testfunc.__name__,
1449 testfunc(wrapper, fo_output)
1450 s_output = fo_output.getvalue()
1451 if s_output == s_input: print 'passed'
1452 else: print 'FAILED'
1453
1454def _test_file_object_smallread(wrapper, fo_output):
1455 while 1:
1456 s = wrapper.read(23)
1457 fo_output.write(s)
1458 if not s: return
1459
1460def _test_file_object_readall(wrapper, fo_output):
1461 s = wrapper.read()
1462 fo_output.write(s)
1463
1464def _test_file_object_readline(wrapper, fo_output):
1465 while 1:
1466 s = wrapper.readline()
1467 fo_output.write(s)
1468 if not s: return
1469
1470def _test_file_object_readlines(wrapper, fo_output):
1471 li = wrapper.readlines()
1472 fo_output.write(string.join(li, ''))
1473
1474if __name__ == '__main__':
1475 _main_test()
1476 _retry_test()
1477 _file_object_test('test')
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py
deleted file mode 100644
index 71393e2b8d..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py
+++ /dev/null
@@ -1,617 +0,0 @@
1# This library is free software; you can redistribute it and/or
2# modify it under the terms of the GNU Lesser General Public
3# License as published by the Free Software Foundation; either
4# version 2.1 of the License, or (at your option) any later version.
5#
6# This library is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9# Lesser General Public License for more details.
10#
11# You should have received a copy of the GNU Lesser General Public
12# License along with this library; if not, write to the
13# Free Software Foundation, Inc.,
14# 59 Temple Place, Suite 330,
15# Boston, MA 02111-1307 USA
16
17# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
20"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
21
22>>> import urllib2
23>>> from keepalive import HTTPHandler
24>>> keepalive_handler = HTTPHandler()
25>>> opener = urllib2.build_opener(keepalive_handler)
26>>> urllib2.install_opener(opener)
27>>>
28>>> fo = urllib2.urlopen('http://www.python.org')
29
30If a connection to a given host is requested, and all of the existing
31connections are still in use, another connection will be opened. If
32the handler tries to use an existing connection but it fails in some
33way, it will be closed and removed from the pool.
34
35To remove the handler, simply re-run build_opener with no arguments, and
36install that opener.
37
38You can explicitly close connections by using the close_connection()
39method of the returned file-like object (described below) or you can
40use the handler methods:
41
42 close_connection(host)
43 close_all()
44 open_connections()
45
46NOTE: using the close_connection and close_all methods of the handler
47should be done with care when using multiple threads.
48 * there is nothing that prevents another thread from creating new
49 connections immediately after connections are closed
50 * no checks are done to prevent in-use connections from being closed
51
52>>> keepalive_handler.close_all()
53
54EXTRA ATTRIBUTES AND METHODS
55
56 Upon a status of 200, the object returned has a few additional
57 attributes and methods, which should not be used if you want to
58 remain consistent with the normal urllib2-returned objects:
59
60 close_connection() - close the connection to the host
61 readlines() - you know, readlines()
62 status - the return status (ie 404)
63 reason - english translation of status (ie 'File not found')
64
65 If you want the best of both worlds, use this inside an
66 AttributeError-catching try:
67
68 >>> try: status = fo.status
69 >>> except AttributeError: status = None
70
71 Unfortunately, these are ONLY there if status == 200, so it's not
72 easy to distinguish between non-200 responses. The reason is that
73 urllib2 tries to do clever things with error codes 301, 302, 401,
74 and 407, and it wraps the object upon return.
75
76 For python versions earlier than 2.4, you can avoid this fancy error
77 handling by setting the module-level global HANDLE_ERRORS to zero.
78 You see, prior to 2.4, it's the HTTP Handler's job to determine what
79 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
80 means "pass everything up". In python 2.4, however, this job no
81 longer belongs to the HTTP Handler and is now done by a NEW handler,
82 HTTPErrorProcessor. Here's the bottom line:
83
84 python version < 2.4
85 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
86 errors
87 HANDLE_ERRORS == 0 pass everything up, error processing is
88 left to the calling code
89 python version >= 2.4
90 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
91 HANDLE_ERRORS == 0 (default) pass everything up, let the
92 other handlers (specifically,
93 HTTPErrorProcessor) decide what to do
94
95 In practice, setting the variable either way makes little difference
96 in python 2.4, so for the most consistent behavior across versions,
97 you probably just want to use the defaults, which will give you
98 exceptions on errors.
99
100"""
101
102# $Id: keepalive.py,v 1.16 2006/09/22 00:58:05 mstenner Exp $
103
104import urllib2
105import httplib
106import socket
107import thread
108
109DEBUG = None
110
111import sslfactory
112
113import sys
114if sys.version_info < (2, 4): HANDLE_ERRORS = 1
115else: HANDLE_ERRORS = 0
116
117class ConnectionManager:
118 """
119 The connection manager must be able to:
120 * keep track of all existing
121 """
122 def __init__(self):
123 self._lock = thread.allocate_lock()
124 self._hostmap = {} # map hosts to a list of connections
125 self._connmap = {} # map connections to host
126 self._readymap = {} # map connection to ready state
127
128 def add(self, host, connection, ready):
129 self._lock.acquire()
130 try:
131 if not self._hostmap.has_key(host): self._hostmap[host] = []
132 self._hostmap[host].append(connection)
133 self._connmap[connection] = host
134 self._readymap[connection] = ready
135 finally:
136 self._lock.release()
137
138 def remove(self, connection):
139 self._lock.acquire()
140 try:
141 try:
142 host = self._connmap[connection]
143 except KeyError:
144 pass
145 else:
146 del self._connmap[connection]
147 del self._readymap[connection]
148 self._hostmap[host].remove(connection)
149 if not self._hostmap[host]: del self._hostmap[host]
150 finally:
151 self._lock.release()
152
153 def set_ready(self, connection, ready):
154 try: self._readymap[connection] = ready
155 except KeyError: pass
156
157 def get_ready_conn(self, host):
158 conn = None
159 self._lock.acquire()
160 try:
161 if self._hostmap.has_key(host):
162 for c in self._hostmap[host]:
163 if self._readymap[c]:
164 self._readymap[c] = 0
165 conn = c
166 break
167 finally:
168 self._lock.release()
169 return conn
170
171 def get_all(self, host=None):
172 if host:
173 return list(self._hostmap.get(host, []))
174 else:
175 return dict(self._hostmap)
176
177class KeepAliveHandler:
178 def __init__(self):
179 self._cm = ConnectionManager()
180
181 #### Connection Management
182 def open_connections(self):
183 """return a list of connected hosts and the number of connections
184 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
185 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
186
187 def close_connection(self, host):
188 """close connection(s) to <host>
189 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
190 no error occurs if there is no connection to that host."""
191 for h in self._cm.get_all(host):
192 self._cm.remove(h)
193 h.close()
194
195 def close_all(self):
196 """close all open connections"""
197 for host, conns in self._cm.get_all().items():
198 for h in conns:
199 self._cm.remove(h)
200 h.close()
201
202 def _request_closed(self, request, host, connection):
203 """tells us that this request is now closed and the the
204 connection is ready for another request"""
205 self._cm.set_ready(connection, 1)
206
207 def _remove_connection(self, host, connection, close=0):
208 if close: connection.close()
209 self._cm.remove(connection)
210
211 #### Transaction Execution
212 def do_open(self, req):
213 host = req.get_host()
214 if not host:
215 raise urllib2.URLError('no host given')
216
217 try:
218 h = self._cm.get_ready_conn(host)
219 while h:
220 r = self._reuse_connection(h, req, host)
221
222 # if this response is non-None, then it worked and we're
223 # done. Break out, skipping the else block.
224 if r: break
225
226 # connection is bad - possibly closed by server
227 # discard it and ask for the next free connection
228 h.close()
229 self._cm.remove(h)
230 h = self._cm.get_ready_conn(host)
231 else:
232 # no (working) free connections were found. Create a new one.
233 h = self._get_connection(host)
234 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
235 host, id(h))
236 self._cm.add(host, h, 0)
237 self._start_transaction(h, req)
238 r = h.getresponse()
239 except (socket.error, httplib.HTTPException), err:
240 raise urllib2.URLError(err)
241
242 # if not a persistent connection, don't try to reuse it
243 if r.will_close: self._cm.remove(h)
244
245 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
246 r._handler = self
247 r._host = host
248 r._url = req.get_full_url()
249 r._connection = h
250 r.code = r.status
251 r.headers = r.msg
252 r.msg = r.reason
253
254 if r.status == 200 or not HANDLE_ERRORS:
255 return r
256 else:
257 return self.parent.error('http', req, r,
258 r.status, r.msg, r.headers)
259
260 def _reuse_connection(self, h, req, host):
261 """start the transaction with a re-used connection
262 return a response object (r) upon success or None on failure.
263 This DOES not close or remove bad connections in cases where
264 it returns. However, if an unexpected exception occurs, it
265 will close and remove the connection before re-raising.
266 """
267 try:
268 self._start_transaction(h, req)
269 r = h.getresponse()
270 # note: just because we got something back doesn't mean it
271 # worked. We'll check the version below, too.
272 except (socket.error, httplib.HTTPException):
273 r = None
274 except:
275 # adding this block just in case we've missed
276 # something we will still raise the exception, but
277 # lets try and close the connection and remove it
278 # first. We previously got into a nasty loop
279 # where an exception was uncaught, and so the
280 # connection stayed open. On the next try, the
281 # same exception was raised, etc. The tradeoff is
282 # that it's now possible this call will raise
283 # a DIFFERENT exception
284 if DEBUG: DEBUG.error("unexpected exception - closing " + \
285 "connection to %s (%d)", host, id(h))
286 self._cm.remove(h)
287 h.close()
288 raise
289
290 if r is None or r.version == 9:
291 # httplib falls back to assuming HTTP 0.9 if it gets a
292 # bad header back. This is most likely to happen if
293 # the socket has been closed by the server since we
294 # last used the connection.
295 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
296 host, id(h))
297 r = None
298 else:
299 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
300
301 return r
302
303 def _start_transaction(self, h, req):
304 try:
305 if req.has_data():
306 data = req.get_data()
307 h.putrequest('POST', req.get_selector())
308 if not req.headers.has_key('Content-type'):
309 h.putheader('Content-type',
310 'application/x-www-form-urlencoded')
311 if not req.headers.has_key('Content-length'):
312 h.putheader('Content-length', '%d' % len(data))
313 else:
314 h.putrequest('GET', req.get_selector())
315 except (socket.error, httplib.HTTPException), err:
316 raise urllib2.URLError(err)
317
318 for args in self.parent.addheaders:
319 h.putheader(*args)
320 for k, v in req.headers.items():
321 h.putheader(k, v)
322 h.endheaders()
323 if req.has_data():
324 h.send(data)
325
326 def _get_connection(self, host):
327 return NotImplementedError
328
329class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
330 def __init__(self):
331 KeepAliveHandler.__init__(self)
332
333 def http_open(self, req):
334 return self.do_open(req)
335
336 def _get_connection(self, host):
337 return HTTPConnection(host)
338
339class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):
340 def __init__(self, ssl_factory=None):
341 KeepAliveHandler.__init__(self)
342 if not ssl_factory:
343 ssl_factory = sslfactory.get_factory()
344 self._ssl_factory = ssl_factory
345
346 def https_open(self, req):
347 return self.do_open(req)
348
349 def _get_connection(self, host):
350 return self._ssl_factory.get_https_connection(host)
351
352class HTTPResponse(httplib.HTTPResponse):
353 # we need to subclass HTTPResponse in order to
354 # 1) add readline() and readlines() methods
355 # 2) add close_connection() methods
356 # 3) add info() and geturl() methods
357
358 # in order to add readline(), read must be modified to deal with a
359 # buffer. example: readline must read a buffer and then spit back
360 # one line at a time. The only real alternative is to read one
361 # BYTE at a time (ick). Once something has been read, it can't be
362 # put back (ok, maybe it can, but that's even uglier than this),
363 # so if you THEN do a normal read, you must first take stuff from
364 # the buffer.
365
366 # the read method wraps the original to accomodate buffering,
367 # although read() never adds to the buffer.
368 # Both readline and readlines have been stolen with almost no
369 # modification from socket.py
370
371
372 def __init__(self, sock, debuglevel=0, strict=0, method=None):
373 if method: # the httplib in python 2.3 uses the method arg
374 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
375 else: # 2.2 doesn't
376 httplib.HTTPResponse.__init__(self, sock, debuglevel)
377 self.fileno = sock.fileno
378 self.code = None
379 self._rbuf = ''
380 self._rbufsize = 8096
381 self._handler = None # inserted by the handler later
382 self._host = None # (same)
383 self._url = None # (same)
384 self._connection = None # (same)
385
386 _raw_read = httplib.HTTPResponse.read
387
388 def close(self):
389 if self.fp:
390 self.fp.close()
391 self.fp = None
392 if self._handler:
393 self._handler._request_closed(self, self._host,
394 self._connection)
395
396 def close_connection(self):
397 self._handler._remove_connection(self._host, self._connection, close=1)
398 self.close()
399
400 def info(self):
401 return self.headers
402
403 def geturl(self):
404 return self._url
405
406 def read(self, amt=None):
407 # the _rbuf test is only in this first if for speed. It's not
408 # logically necessary
409 if self._rbuf and not amt is None:
410 L = len(self._rbuf)
411 if amt > L:
412 amt -= L
413 else:
414 s = self._rbuf[:amt]
415 self._rbuf = self._rbuf[amt:]
416 return s
417
418 s = self._rbuf + self._raw_read(amt)
419 self._rbuf = ''
420 return s
421
422 def readline(self, limit=-1):
423 data = ""
424 i = self._rbuf.find('\n')
425 while i < 0 and not (0 < limit <= len(self._rbuf)):
426 new = self._raw_read(self._rbufsize)
427 if not new: break
428 i = new.find('\n')
429 if i >= 0: i = i + len(self._rbuf)
430 self._rbuf = self._rbuf + new
431 if i < 0: i = len(self._rbuf)
432 else: i = i+1
433 if 0 <= limit < len(self._rbuf): i = limit
434 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
435 return data
436
437 def readlines(self, sizehint = 0):
438 total = 0
439 list = []
440 while 1:
441 line = self.readline()
442 if not line: break
443 list.append(line)
444 total += len(line)
445 if sizehint and total >= sizehint:
446 break
447 return list
448
449
450class HTTPConnection(httplib.HTTPConnection):
451 # use the modified response class
452 response_class = HTTPResponse
453
454class HTTPSConnection(httplib.HTTPSConnection):
455 response_class = HTTPResponse
456
457#########################################################################
458##### TEST FUNCTIONS
459#########################################################################
460
461def error_handler(url):
462 global HANDLE_ERRORS
463 orig = HANDLE_ERRORS
464 keepalive_handler = HTTPHandler()
465 opener = urllib2.build_opener(keepalive_handler)
466 urllib2.install_opener(opener)
467 pos = {0: 'off', 1: 'on'}
468 for i in (0, 1):
469 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
470 HANDLE_ERRORS = i
471 try:
472 fo = urllib2.urlopen(url)
473 foo = fo.read()
474 fo.close()
475 try: status, reason = fo.status, fo.reason
476 except AttributeError: status, reason = None, None
477 except IOError, e:
478 print " EXCEPTION: %s" % e
479 raise
480 else:
481 print " status = %s, reason = %s" % (status, reason)
482 HANDLE_ERRORS = orig
483 hosts = keepalive_handler.open_connections()
484 print "open connections:", hosts
485 keepalive_handler.close_all()
486
487def continuity(url):
488 import md5
489 format = '%25s: %s'
490
491 # first fetch the file with the normal http handler
492 opener = urllib2.build_opener()
493 urllib2.install_opener(opener)
494 fo = urllib2.urlopen(url)
495 foo = fo.read()
496 fo.close()
497 m = md5.new(foo)
498 print format % ('normal urllib', m.hexdigest())
499
500 # now install the keepalive handler and try again
501 opener = urllib2.build_opener(HTTPHandler())
502 urllib2.install_opener(opener)
503
504 fo = urllib2.urlopen(url)
505 foo = fo.read()
506 fo.close()
507 m = md5.new(foo)
508 print format % ('keepalive read', m.hexdigest())
509
510 fo = urllib2.urlopen(url)
511 foo = ''
512 while 1:
513 f = fo.readline()
514 if f: foo = foo + f
515 else: break
516 fo.close()
517 m = md5.new(foo)
518 print format % ('keepalive readline', m.hexdigest())
519
520def comp(N, url):
521 print ' making %i connections to:\n %s' % (N, url)
522
523 sys.stdout.write(' first using the normal urllib handlers')
524 # first use normal opener
525 opener = urllib2.build_opener()
526 urllib2.install_opener(opener)
527 t1 = fetch(N, url)
528 print ' TIME: %.3f s' % t1
529
530 sys.stdout.write(' now using the keepalive handler ')
531 # now install the keepalive handler and try again
532 opener = urllib2.build_opener(HTTPHandler())
533 urllib2.install_opener(opener)
534 t2 = fetch(N, url)
535 print ' TIME: %.3f s' % t2
536 print ' improvement factor: %.2f' % (t1/t2, )
537
538def fetch(N, url, delay=0):
539 import time
540 lens = []
541 starttime = time.time()
542 for i in range(N):
543 if delay and i > 0: time.sleep(delay)
544 fo = urllib2.urlopen(url)
545 foo = fo.read()
546 fo.close()
547 lens.append(len(foo))
548 diff = time.time() - starttime
549
550 j = 0
551 for i in lens[1:]:
552 j = j + 1
553 if not i == lens[0]:
554 print "WARNING: inconsistent length on read %i: %i" % (j, i)
555
556 return diff
557
558def test_timeout(url):
559 global DEBUG
560 dbbackup = DEBUG
561 class FakeLogger:
562 def debug(self, msg, *args): print msg % args
563 info = warning = error = debug
564 DEBUG = FakeLogger()
565 print " fetching the file to establish a connection"
566 fo = urllib2.urlopen(url)
567 data1 = fo.read()
568 fo.close()
569
570 i = 20
571 print " waiting %i seconds for the server to close the connection" % i
572 while i > 0:
573 sys.stdout.write('\r %2i' % i)
574 sys.stdout.flush()
575 time.sleep(1)
576 i -= 1
577 sys.stderr.write('\r')
578
579 print " fetching the file a second time"
580 fo = urllib2.urlopen(url)
581 data2 = fo.read()
582 fo.close()
583
584 if data1 == data2:
585 print ' data are identical'
586 else:
587 print ' ERROR: DATA DIFFER'
588
589 DEBUG = dbbackup
590
591
592def test(url, N=10):
593 print "checking error hander (do this on a non-200)"
594 try: error_handler(url)
595 except IOError, e:
596 print "exiting - exception will prevent further tests"
597 sys.exit()
598 print
599 print "performing continuity test (making sure stuff isn't corrupted)"
600 continuity(url)
601 print
602 print "performing speed comparison"
603 comp(N, url)
604 print
605 print "performing dropped-connection check"
606 test_timeout(url)
607
608if __name__ == '__main__':
609 import time
610 import sys
611 try:
612 N = int(sys.argv[1])
613 url = sys.argv[2]
614 except:
615 print "%s <integer> <url>" % sys.argv[0]
616 else:
617 test(url, N)
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
deleted file mode 100644
index 9664c6b5c5..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
+++ /dev/null
@@ -1,458 +0,0 @@
1# This library is free software; you can redistribute it and/or
2# modify it under the terms of the GNU Lesser General Public
3# License as published by the Free Software Foundation; either
4# version 2.1 of the License, or (at your option) any later version.
5#
6# This library is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9# Lesser General Public License for more details.
10#
11# You should have received a copy of the GNU Lesser General Public
12# License along with this library; if not, write to the
13# Free Software Foundation, Inc.,
14# 59 Temple Place, Suite 330,
15# Boston, MA 02111-1307 USA
16
17# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
20"""Module for downloading files from a pool of mirrors
21
22DESCRIPTION
23
24 This module provides support for downloading files from a pool of
25 mirrors with configurable failover policies. To a large extent, the
26 failover policy is chosen by using different classes derived from
27 the main class, MirrorGroup.
28
29 Instances of MirrorGroup (and cousins) act very much like URLGrabber
30 instances in that they have urlread, urlgrab, and urlopen methods.
31 They can therefore, be used in very similar ways.
32
33 from urlgrabber.grabber import URLGrabber
34 from urlgrabber.mirror import MirrorGroup
35 gr = URLGrabber()
36 mg = MirrorGroup(gr, ['http://foo.com/some/directory/',
37 'http://bar.org/maybe/somewhere/else/',
38 'ftp://baz.net/some/other/place/entirely/']
39 mg.urlgrab('relative/path.zip')
40
41 The assumption is that all mirrors are identical AFTER the base urls
42 specified, so that any mirror can be used to fetch any file.
43
44FAILOVER
45
46 The failover mechanism is designed to be customized by subclassing
47 from MirrorGroup to change the details of the behavior. In general,
48 the classes maintain a master mirror list and a "current mirror"
49 index. When a download is initiated, a copy of this list and index
50 is created for that download only. The specific failover policy
51 depends on the class used, and so is documented in the class
52 documentation. Note that ANY behavior of the class can be
53 overridden, so any failover policy at all is possible (although
54 you may need to change the interface in extreme cases).
55
56CUSTOMIZATION
57
58 Most customization of a MirrorGroup object is done at instantiation
59 time (or via subclassing). There are four major types of
60 customization:
61
62 1) Pass in a custom urlgrabber - The passed in urlgrabber will be
63 used (by default... see #2) for the grabs, so options to it
64 apply for the url-fetching
65
66 2) Custom mirror list - Mirror lists can simply be a list of
67 stings mirrors (as shown in the example above) but each can
68 also be a dict, allowing for more options. For example, the
69 first mirror in the list above could also have been:
70
71 {'mirror': 'http://foo.com/some/directory/',
72 'grabber': <a custom grabber to be used for this mirror>,
73 'kwargs': { <a dict of arguments passed to the grabber> }}
74
75 All mirrors are converted to this format internally. If
76 'grabber' is omitted, the default grabber will be used. If
77 kwargs are omitted, then (duh) they will not be used.
78
79 3) Pass keyword arguments when instantiating the mirror group.
80 See, for example, the failure_callback argument.
81
82 4) Finally, any kwargs passed in for the specific file (to the
83 urlgrab method, for example) will be folded in. The options
84 passed into the grabber's urlXXX methods will override any
85 options specified in a custom mirror dict.
86
87"""
88
89# $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $
90
91import random
92import thread # needed for locking to make this threadsafe
93
94from grabber import URLGrabError, CallbackObject, DEBUG
95
96try:
97 from i18n import _
98except ImportError, msg:
99 def _(st): return st
100
101class GrabRequest:
102 """This is a dummy class used to hold information about the specific
103 request. For example, a single file. By maintaining this information
104 separately, we can accomplish two things:
105
106 1) make it a little easier to be threadsafe
107 2) have request-specific parameters
108 """
109 pass
110
111class MirrorGroup:
112 """Base Mirror class
113
114 Instances of this class are built with a grabber object and a list
115 of mirrors. Then all calls to urlXXX should be passed relative urls.
116 The requested file will be searched for on the first mirror. If the
117 grabber raises an exception (possibly after some retries) then that
118 mirror will be removed from the list, and the next will be attempted.
119 If all mirrors are exhausted, then an exception will be raised.
120
121 MirrorGroup has the following failover policy:
122
123 * downloads begin with the first mirror
124
125 * by default (see default_action below) a failure (after retries)
126 causes it to increment the local AND master indices. Also,
127 the current mirror is removed from the local list (but NOT the
128 master list - the mirror can potentially be used for other
129 files)
130
131 * if the local list is ever exhausted, a URLGrabError will be
132 raised (errno=256, no more mirrors)
133
134 OPTIONS
135
136 In addition to the required arguments "grabber" and "mirrors",
137 MirrorGroup also takes the following optional arguments:
138
139 default_action
140
141 A dict that describes the actions to be taken upon failure
142 (after retries). default_action can contain any of the
143 following keys (shown here with their default values):
144
145 default_action = {'increment': 1,
146 'increment_master': 1,
147 'remove': 1,
148 'remove_master': 0,
149 'fail': 0}
150
151 In this context, 'increment' means "use the next mirror" and
152 'remove' means "never use this mirror again". The two
153 'master' values refer to the instance-level mirror list (used
154 for all files), whereas the non-master values refer to the
155 current download only.
156
157 The 'fail' option will cause immediate failure by re-raising
158 the exception and no further attempts to get the current
159 download.
160
161 This dict can be set at instantiation time,
162 mg = MirrorGroup(grabber, mirrors, default_action={'fail':1})
163 at method-execution time (only applies to current fetch),
164 filename = mg.urlgrab(url, default_action={'increment': 0})
165 or by returning an action dict from the failure_callback
166 return {'fail':0}
167 in increasing precedence.
168
169 If all three of these were done, the net result would be:
170 {'increment': 0, # set in method
171 'increment_master': 1, # class default
172 'remove': 1, # class default
173 'remove_master': 0, # class default
174 'fail': 0} # set at instantiation, reset
175 # from callback
176
177 failure_callback
178
179 this is a callback that will be called when a mirror "fails",
180 meaning the grabber raises some URLGrabError. If this is a
181 tuple, it is interpreted to be of the form (cb, args, kwargs)
182 where cb is the actual callable object (function, method,
183 etc). Otherwise, it is assumed to be the callable object
184 itself. The callback will be passed a grabber.CallbackObject
185 instance along with args and kwargs (if present). The following
186 attributes are defined withing the instance:
187
188 obj.exception = < exception that was raised >
189 obj.mirror = < the mirror that was tried >
190 obj.relative_url = < url relative to the mirror >
191 obj.url = < full url that failed >
192 # .url is just the combination of .mirror
193 # and .relative_url
194
195 The failure callback can return an action dict, as described
196 above.
197
198 Like default_action, the failure_callback can be set at
199 instantiation time or when the urlXXX method is called. In
200 the latter case, it applies only for that fetch.
201
202 The callback can re-raise the exception quite easily. For
203 example, this is a perfectly adequate callback function:
204
205 def callback(obj): raise obj.exception
206
207 WARNING: do not save the exception object (or the
208 CallbackObject instance). As they contain stack frame
209 references, they can lead to circular references.
210
211 Notes:
212 * The behavior can be customized by deriving and overriding the
213 'CONFIGURATION METHODS'
214 * The 'grabber' instance is kept as a reference, not copied.
215 Therefore, the grabber instance can be modified externally
216 and changes will take effect immediately.
217 """
218
219 # notes on thread-safety:
220
221 # A GrabRequest should never be shared by multiple threads because
222 # it's never saved inside the MG object and never returned outside it.
223 # therefore, it should be safe to access/modify grabrequest data
224 # without a lock. However, accessing the mirrors and _next attributes
225 # of the MG itself must be done when locked to prevent (for example)
226 # removal of the wrong mirror.
227
228 ##############################################################
229 # CONFIGURATION METHODS - intended to be overridden to
230 # customize behavior
231 def __init__(self, grabber, mirrors, **kwargs):
232 """Initialize the MirrorGroup object.
233
234 REQUIRED ARGUMENTS
235
236 grabber - URLGrabber instance
237 mirrors - a list of mirrors
238
239 OPTIONAL ARGUMENTS
240
241 failure_callback - callback to be used when a mirror fails
242 default_action - dict of failure actions
243
244 See the module-level and class level documentation for more
245 details.
246 """
247
248 # OVERRIDE IDEAS:
249 # shuffle the list to randomize order
250 self.grabber = grabber
251 self.mirrors = self._parse_mirrors(mirrors)
252 self._next = 0
253 self._lock = thread.allocate_lock()
254 self.default_action = None
255 self._process_kwargs(kwargs)
256
257 # if these values are found in **kwargs passed to one of the urlXXX
258 # methods, they will be stripped before getting passed on to the
259 # grabber
260 options = ['default_action', 'failure_callback']
261
262 def _process_kwargs(self, kwargs):
263 self.failure_callback = kwargs.get('failure_callback')
264 self.default_action = kwargs.get('default_action')
265
266 def _parse_mirrors(self, mirrors):
267 parsed_mirrors = []
268 for m in mirrors:
269 if type(m) == type(''): m = {'mirror': m}
270 parsed_mirrors.append(m)
271 return parsed_mirrors
272
273 def _load_gr(self, gr):
274 # OVERRIDE IDEAS:
275 # shuffle gr list
276 self._lock.acquire()
277 gr.mirrors = list(self.mirrors)
278 gr._next = self._next
279 self._lock.release()
280
281 def _get_mirror(self, gr):
282 # OVERRIDE IDEAS:
283 # return a random mirror so that multiple mirrors get used
284 # even without failures.
285 if not gr.mirrors:
286 raise URLGrabError(256, _('No more mirrors to try.'))
287 return gr.mirrors[gr._next]
288
289 def _failure(self, gr, cb_obj):
290 # OVERRIDE IDEAS:
291 # inspect the error - remove=1 for 404, remove=2 for connection
292 # refused, etc. (this can also be done via
293 # the callback)
294 cb = gr.kw.get('failure_callback') or self.failure_callback
295 if cb:
296 if type(cb) == type( () ):
297 cb, args, kwargs = cb
298 else:
299 args, kwargs = (), {}
300 action = cb(cb_obj, *args, **kwargs) or {}
301 else:
302 action = {}
303 # XXXX - decide - there are two ways to do this
304 # the first is action-overriding as a whole - use the entire action
305 # or fall back on module level defaults
306 #action = action or gr.kw.get('default_action') or self.default_action
307 # the other is to fall through for each element in the action dict
308 a = dict(self.default_action or {})
309 a.update(gr.kw.get('default_action', {}))
310 a.update(action)
311 action = a
312 self.increment_mirror(gr, action)
313 if action and action.get('fail', 0): raise
314
315 def increment_mirror(self, gr, action={}):
316 """Tell the mirror object increment the mirror index
317
318 This increments the mirror index, which amounts to telling the
319 mirror object to use a different mirror (for this and future
320 downloads).
321
322 This is a SEMI-public method. It will be called internally,
323 and you may never need to call it. However, it is provided
324 (and is made public) so that the calling program can increment
325 the mirror choice for methods like urlopen. For example, with
326 urlopen, there's no good way for the mirror group to know that
327 an error occurs mid-download (it's already returned and given
328 you the file object).
329
330 remove --- can have several values
331 0 do not remove the mirror from the list
332 1 remove the mirror for this download only
333 2 remove the mirror permanently
334
335 beware of remove=0 as it can lead to infinite loops
336 """
337 badmirror = gr.mirrors[gr._next]
338
339 self._lock.acquire()
340 try:
341 ind = self.mirrors.index(badmirror)
342 except ValueError:
343 pass
344 else:
345 if action.get('remove_master', 0):
346 del self.mirrors[ind]
347 elif self._next == ind and action.get('increment_master', 1):
348 self._next += 1
349 if self._next >= len(self.mirrors): self._next = 0
350 self._lock.release()
351
352 if action.get('remove', 1):
353 del gr.mirrors[gr._next]
354 elif action.get('increment', 1):
355 gr._next += 1
356 if gr._next >= len(gr.mirrors): gr._next = 0
357
358 if DEBUG:
359 grm = [m['mirror'] for m in gr.mirrors]
360 DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next)
361 selfm = [m['mirror'] for m in self.mirrors]
362 DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next)
363
364 #####################################################################
365 # NON-CONFIGURATION METHODS
366 # these methods are designed to be largely workhorse methods that
367 # are not intended to be overridden. That doesn't mean you can't;
368 # if you want to, feel free, but most things can be done by
369 # by overriding the configuration methods :)
370
371 def _join_url(self, base_url, rel_url):
372 if base_url.endswith('/') or rel_url.startswith('/'):
373 return base_url + rel_url
374 else:
375 return base_url + '/' + rel_url
376
377 def _mirror_try(self, func, url, kw):
378 gr = GrabRequest()
379 gr.func = func
380 gr.url = url
381 gr.kw = dict(kw)
382 self._load_gr(gr)
383
384 for k in self.options:
385 try: del kw[k]
386 except KeyError: pass
387
388 while 1:
389 mirrorchoice = self._get_mirror(gr)
390 fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
391 kwargs = dict(mirrorchoice.get('kwargs', {}))
392 kwargs.update(kw)
393 grabber = mirrorchoice.get('grabber') or self.grabber
394 func_ref = getattr(grabber, func)
395 if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
396 try:
397 return func_ref( *(fullurl,), **kwargs )
398 except URLGrabError, e:
399 if DEBUG: DEBUG.info('MIRROR: failed')
400 obj = CallbackObject()
401 obj.exception = e
402 obj.mirror = mirrorchoice['mirror']
403 obj.relative_url = gr.url
404 obj.url = fullurl
405 self._failure(gr, obj)
406
407 def urlgrab(self, url, filename=None, **kwargs):
408 kw = dict(kwargs)
409 kw['filename'] = filename
410 func = 'urlgrab'
411 return self._mirror_try(func, url, kw)
412
413 def urlopen(self, url, **kwargs):
414 kw = dict(kwargs)
415 func = 'urlopen'
416 return self._mirror_try(func, url, kw)
417
418 def urlread(self, url, limit=None, **kwargs):
419 kw = dict(kwargs)
420 kw['limit'] = limit
421 func = 'urlread'
422 return self._mirror_try(func, url, kw)
423
424
425class MGRandomStart(MirrorGroup):
426 """A mirror group that starts at a random mirror in the list.
427
428 This behavior of this class is identical to MirrorGroup, except that
429 it starts at a random location in the mirror list.
430 """
431
432 def __init__(self, grabber, mirrors, **kwargs):
433 """Initialize the object
434
435 The arguments for intialization are the same as for MirrorGroup
436 """
437 MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
438 self._next = random.randrange(len(mirrors))
439
440class MGRandomOrder(MirrorGroup):
441 """A mirror group that uses mirrors in a random order.
442
443 This behavior of this class is identical to MirrorGroup, except that
444 it uses the mirrors in a random order. Note that the order is set at
445 initialization time and fixed thereafter. That is, it does not pick a
446 random mirror after each failure.
447 """
448
449 def __init__(self, grabber, mirrors, **kwargs):
450 """Initialize the object
451
452 The arguments for intialization are the same as for MirrorGroup
453 """
454 MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
455 random.shuffle(self.mirrors)
456
457if __name__ == '__main__':
458 pass
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py
deleted file mode 100644
index 02db524e76..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py
+++ /dev/null
@@ -1,530 +0,0 @@
1# This library is free software; you can redistribute it and/or
2# modify it under the terms of the GNU Lesser General Public
3# License as published by the Free Software Foundation; either
4# version 2.1 of the License, or (at your option) any later version.
5#
6# This library is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9# Lesser General Public License for more details.
10#
11# You should have received a copy of the GNU Lesser General Public
12# License along with this library; if not, write to the
13# Free Software Foundation, Inc.,
14# 59 Temple Place, Suite 330,
15# Boston, MA 02111-1307 USA
16
17# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
20# $Id: progress.py,v 1.7 2005/08/19 21:59:07 mstenner Exp $
21
22import sys
23import time
24import math
25import thread
26
27class BaseMeter:
28 def __init__(self):
29 self.update_period = 0.3 # seconds
30
31 self.filename = None
32 self.url = None
33 self.basename = None
34 self.text = None
35 self.size = None
36 self.start_time = None
37 self.last_amount_read = 0
38 self.last_update_time = None
39 self.re = RateEstimator()
40
41 def start(self, filename=None, url=None, basename=None,
42 size=None, now=None, text=None):
43 self.filename = filename
44 self.url = url
45 self.basename = basename
46 self.text = text
47
48 #size = None ######### TESTING
49 self.size = size
50 if not size is None: self.fsize = format_number(size) + 'B'
51
52 if now is None: now = time.time()
53 self.start_time = now
54 self.re.start(size, now)
55 self.last_amount_read = 0
56 self.last_update_time = now
57 self._do_start(now)
58
59 def _do_start(self, now=None):
60 pass
61
62 def update(self, amount_read, now=None):
63 # for a real gui, you probably want to override and put a call
64 # to your mainloop iteration function here
65 if now is None: now = time.time()
66 if (now >= self.last_update_time + self.update_period) or \
67 not self.last_update_time:
68 self.re.update(amount_read, now)
69 self.last_amount_read = amount_read
70 self.last_update_time = now
71 self._do_update(amount_read, now)
72
73 def _do_update(self, amount_read, now=None):
74 pass
75
76 def end(self, amount_read, now=None):
77 if now is None: now = time.time()
78 self.re.update(amount_read, now)
79 self.last_amount_read = amount_read
80 self.last_update_time = now
81 self._do_end(amount_read, now)
82
83 def _do_end(self, amount_read, now=None):
84 pass
85
86class TextMeter(BaseMeter):
87 def __init__(self, fo=sys.stderr):
88 BaseMeter.__init__(self)
89 self.fo = fo
90
91 def _do_update(self, amount_read, now=None):
92 etime = self.re.elapsed_time()
93 fetime = format_time(etime)
94 fread = format_number(amount_read)
95 #self.size = None
96 if self.text is not None:
97 text = self.text
98 else:
99 text = self.basename
100 if self.size is None:
101 out = '\r%-60.60s %5sB %s ' % \
102 (text, fread, fetime)
103 else:
104 rtime = self.re.remaining_time()
105 frtime = format_time(rtime)
106 frac = self.re.fraction_read()
107 bar = '='*int(25 * frac)
108
109 out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \
110 (text, frac*100, bar, fread, frtime)
111
112 self.fo.write(out)
113 self.fo.flush()
114
115 def _do_end(self, amount_read, now=None):
116 total_time = format_time(self.re.elapsed_time())
117 total_size = format_number(amount_read)
118 if self.text is not None:
119 text = self.text
120 else:
121 text = self.basename
122 if self.size is None:
123 out = '\r%-60.60s %5sB %s ' % \
124 (text, total_size, total_time)
125 else:
126 bar = '='*25
127 out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ' % \
128 (text, 100, bar, total_size, total_time)
129 self.fo.write(out + '\n')
130 self.fo.flush()
131
132text_progress_meter = TextMeter
133
134class MultiFileHelper(BaseMeter):
135 def __init__(self, master):
136 BaseMeter.__init__(self)
137 self.master = master
138
139 def _do_start(self, now):
140 self.master.start_meter(self, now)
141
142 def _do_update(self, amount_read, now):
143 # elapsed time since last update
144 self.master.update_meter(self, now)
145
146 def _do_end(self, amount_read, now):
147 self.ftotal_time = format_time(now - self.start_time)
148 self.ftotal_size = format_number(self.last_amount_read)
149 self.master.end_meter(self, now)
150
151 def failure(self, message, now=None):
152 self.master.failure_meter(self, message, now)
153
154 def message(self, message):
155 self.master.message_meter(self, message)
156
157class MultiFileMeter:
158 helperclass = MultiFileHelper
159 def __init__(self):
160 self.meters = []
161 self.in_progress_meters = []
162 self._lock = thread.allocate_lock()
163 self.update_period = 0.3 # seconds
164
165 self.numfiles = None
166 self.finished_files = 0
167 self.failed_files = 0
168 self.open_files = 0
169 self.total_size = None
170 self.failed_size = 0
171 self.start_time = None
172 self.finished_file_size = 0
173 self.last_update_time = None
174 self.re = RateEstimator()
175
176 def start(self, numfiles=None, total_size=None, now=None):
177 if now is None: now = time.time()
178 self.numfiles = numfiles
179 self.finished_files = 0
180 self.failed_files = 0
181 self.open_files = 0
182 self.total_size = total_size
183 self.failed_size = 0
184 self.start_time = now
185 self.finished_file_size = 0
186 self.last_update_time = now
187 self.re.start(total_size, now)
188 self._do_start(now)
189
190 def _do_start(self, now):
191 pass
192
193 def end(self, now=None):
194 if now is None: now = time.time()
195 self._do_end(now)
196
197 def _do_end(self, now):
198 pass
199
200 def lock(self): self._lock.acquire()
201 def unlock(self): self._lock.release()
202
203 ###########################################################
204 # child meter creation and destruction
205 def newMeter(self):
206 newmeter = self.helperclass(self)
207 self.meters.append(newmeter)
208 return newmeter
209
210 def removeMeter(self, meter):
211 self.meters.remove(meter)
212
213 ###########################################################
214 # child functions - these should only be called by helpers
215 def start_meter(self, meter, now):
216 if not meter in self.meters:
217 raise ValueError('attempt to use orphaned meter')
218 self._lock.acquire()
219 try:
220 if not meter in self.in_progress_meters:
221 self.in_progress_meters.append(meter)
222 self.open_files += 1
223 finally:
224 self._lock.release()
225 self._do_start_meter(meter, now)
226
227 def _do_start_meter(self, meter, now):
228 pass
229
230 def update_meter(self, meter, now):
231 if not meter in self.meters:
232 raise ValueError('attempt to use orphaned meter')
233 if (now >= self.last_update_time + self.update_period) or \
234 not self.last_update_time:
235 self.re.update(self._amount_read(), now)
236 self.last_update_time = now
237 self._do_update_meter(meter, now)
238
239 def _do_update_meter(self, meter, now):
240 pass
241
242 def end_meter(self, meter, now):
243 if not meter in self.meters:
244 raise ValueError('attempt to use orphaned meter')
245 self._lock.acquire()
246 try:
247 try: self.in_progress_meters.remove(meter)
248 except ValueError: pass
249 self.open_files -= 1
250 self.finished_files += 1
251 self.finished_file_size += meter.last_amount_read
252 finally:
253 self._lock.release()
254 self._do_end_meter(meter, now)
255
256 def _do_end_meter(self, meter, now):
257 pass
258
259 def failure_meter(self, meter, message, now):
260 if not meter in self.meters:
261 raise ValueError('attempt to use orphaned meter')
262 self._lock.acquire()
263 try:
264 try: self.in_progress_meters.remove(meter)
265 except ValueError: pass
266 self.open_files -= 1
267 self.failed_files += 1
268 if meter.size and self.failed_size is not None:
269 self.failed_size += meter.size
270 else:
271 self.failed_size = None
272 finally:
273 self._lock.release()
274 self._do_failure_meter(meter, message, now)
275
276 def _do_failure_meter(self, meter, message, now):
277 pass
278
279 def message_meter(self, meter, message):
280 pass
281
282 ########################################################
283 # internal functions
284 def _amount_read(self):
285 tot = self.finished_file_size
286 for m in self.in_progress_meters:
287 tot += m.last_amount_read
288 return tot
289
290
291class TextMultiFileMeter(MultiFileMeter):
292 def __init__(self, fo=sys.stderr):
293 self.fo = fo
294 MultiFileMeter.__init__(self)
295
296 # files: ###/### ###% data: ######/###### ###% time: ##:##:##/##:##:##
297 def _do_update_meter(self, meter, now):
298 self._lock.acquire()
299 try:
300 format = "files: %3i/%-3i %3i%% data: %6.6s/%-6.6s %3i%% " \
301 "time: %8.8s/%8.8s"
302 df = self.finished_files
303 tf = self.numfiles or 1
304 pf = 100 * float(df)/tf + 0.49
305 dd = self.re.last_amount_read
306 td = self.total_size
307 pd = 100 * (self.re.fraction_read() or 0) + 0.49
308 dt = self.re.elapsed_time()
309 rt = self.re.remaining_time()
310 if rt is None: tt = None
311 else: tt = dt + rt
312
313 fdd = format_number(dd) + 'B'
314 ftd = format_number(td) + 'B'
315 fdt = format_time(dt, 1)
316 ftt = format_time(tt, 1)
317
318 out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt))
319 self.fo.write('\r' + out)
320 self.fo.flush()
321 finally:
322 self._lock.release()
323
324 def _do_end_meter(self, meter, now):
325 self._lock.acquire()
326 try:
327 format = "%-30.30s %6.6s %8.8s %9.9s"
328 fn = meter.basename
329 size = meter.last_amount_read
330 fsize = format_number(size) + 'B'
331 et = meter.re.elapsed_time()
332 fet = format_time(et, 1)
333 frate = format_number(size / et) + 'B/s'
334
335 out = '%-79.79s' % (format % (fn, fsize, fet, frate))
336 self.fo.write('\r' + out + '\n')
337 finally:
338 self._lock.release()
339 self._do_update_meter(meter, now)
340
341 def _do_failure_meter(self, meter, message, now):
342 self._lock.acquire()
343 try:
344 format = "%-30.30s %6.6s %s"
345 fn = meter.basename
346 if type(message) in (type(''), type(u'')):
347 message = message.splitlines()
348 if not message: message = ['']
349 out = '%-79s' % (format % (fn, 'FAILED', message[0] or ''))
350 self.fo.write('\r' + out + '\n')
351 for m in message[1:]: self.fo.write(' ' + m + '\n')
352 self._lock.release()
353 finally:
354 self._do_update_meter(meter, now)
355
356 def message_meter(self, meter, message):
357 self._lock.acquire()
358 try:
359 pass
360 finally:
361 self._lock.release()
362
363 def _do_end(self, now):
364 self._do_update_meter(None, now)
365 self._lock.acquire()
366 try:
367 self.fo.write('\n')
368 self.fo.flush()
369 finally:
370 self._lock.release()
371
372######################################################################
373# support classes and functions
374
375class RateEstimator:
376 def __init__(self, timescale=5.0):
377 self.timescale = timescale
378
379 def start(self, total=None, now=None):
380 if now is None: now = time.time()
381 self.total = total
382 self.start_time = now
383 self.last_update_time = now
384 self.last_amount_read = 0
385 self.ave_rate = None
386
387 def update(self, amount_read, now=None):
388 if now is None: now = time.time()
389 if amount_read == 0:
390 # if we just started this file, all bets are off
391 self.last_update_time = now
392 self.last_amount_read = 0
393 self.ave_rate = None
394 return
395
396 #print 'times', now, self.last_update_time
397 time_diff = now - self.last_update_time
398 read_diff = amount_read - self.last_amount_read
399 self.last_update_time = now
400 self.last_amount_read = amount_read
401 self.ave_rate = self._temporal_rolling_ave(\
402 time_diff, read_diff, self.ave_rate, self.timescale)
403 #print 'results', time_diff, read_diff, self.ave_rate
404
405 #####################################################################
406 # result methods
407 def average_rate(self):
408 "get the average transfer rate (in bytes/second)"
409 return self.ave_rate
410
411 def elapsed_time(self):
412 "the time between the start of the transfer and the most recent update"
413 return self.last_update_time - self.start_time
414
415 def remaining_time(self):
416 "estimated time remaining"
417 if not self.ave_rate or not self.total: return None
418 return (self.total - self.last_amount_read) / self.ave_rate
419
420 def fraction_read(self):
421 """the fraction of the data that has been read
422 (can be None for unknown transfer size)"""
423 if self.total is None: return None
424 elif self.total == 0: return 1.0
425 else: return float(self.last_amount_read)/self.total
426
427 #########################################################################
428 # support methods
429 def _temporal_rolling_ave(self, time_diff, read_diff, last_ave, timescale):
430 """a temporal rolling average performs smooth averaging even when
431 updates come at irregular intervals. This is performed by scaling
432 the "epsilon" according to the time since the last update.
433 Specifically, epsilon = time_diff / timescale
434
435 As a general rule, the average will take on a completely new value
436 after 'timescale' seconds."""
437 epsilon = time_diff / timescale
438 if epsilon > 1: epsilon = 1.0
439 return self._rolling_ave(time_diff, read_diff, last_ave, epsilon)
440
441 def _rolling_ave(self, time_diff, read_diff, last_ave, epsilon):
442 """perform a "rolling average" iteration
443 a rolling average "folds" new data into an existing average with
444 some weight, epsilon. epsilon must be between 0.0 and 1.0 (inclusive)
445 a value of 0.0 means only the old value (initial value) counts,
446 and a value of 1.0 means only the newest value is considered."""
447
448 try:
449 recent_rate = read_diff / time_diff
450 except ZeroDivisionError:
451 recent_rate = None
452 if last_ave is None: return recent_rate
453 elif recent_rate is None: return last_ave
454
455 # at this point, both last_ave and recent_rate are numbers
456 return epsilon * recent_rate + (1 - epsilon) * last_ave
457
458 def _round_remaining_time(self, rt, start_time=15.0):
459 """round the remaining time, depending on its size
460 If rt is between n*start_time and (n+1)*start_time round downward
461 to the nearest multiple of n (for any counting number n).
462 If rt < start_time, round down to the nearest 1.
463 For example (for start_time = 15.0):
464 2.7 -> 2.0
465 25.2 -> 25.0
466 26.4 -> 26.0
467 35.3 -> 34.0
468 63.6 -> 60.0
469 """
470
471 if rt < 0: return 0.0
472 shift = int(math.log(rt/start_time)/math.log(2))
473 rt = int(rt)
474 if shift <= 0: return rt
475 return float(int(rt) >> shift << shift)
476
477
478def format_time(seconds, use_hours=0):
479 if seconds is None or seconds < 0:
480 if use_hours: return '--:--:--'
481 else: return '--:--'
482 else:
483 seconds = int(seconds)
484 minutes = seconds / 60
485 seconds = seconds % 60
486 if use_hours:
487 hours = minutes / 60
488 minutes = minutes % 60
489 return '%02i:%02i:%02i' % (hours, minutes, seconds)
490 else:
491 return '%02i:%02i' % (minutes, seconds)
492
493def format_number(number, SI=0, space=' '):
494 """Turn numbers into human-readable metric-like numbers"""
495 symbols = ['', # (none)
496 'k', # kilo
497 'M', # mega
498 'G', # giga
499 'T', # tera
500 'P', # peta
501 'E', # exa
502 'Z', # zetta
503 'Y'] # yotta
504
505 if SI: step = 1000.0
506 else: step = 1024.0
507
508 thresh = 999
509 depth = 0
510 max_depth = len(symbols) - 1
511
512 # we want numbers between 0 and thresh, but don't exceed the length
513 # of our list. In that event, the formatting will be screwed up,
514 # but it'll still show the right number.
515 while number > thresh and depth < max_depth:
516 depth = depth + 1
517 number = number / step
518
519 if type(number) == type(1) or type(number) == type(1L):
520 # it's an int or a long, which means it didn't get divided,
521 # which means it's already short enough
522 format = '%i%s%s'
523 elif number < 9.95:
524 # must use 9.95 for proper sizing. For example, 9.99 will be
525 # rounded to 10.0 with the .1f format string (which is too long)
526 format = '%.1f%s%s'
527 else:
528 format = '%.0f%s%s'
529
530 return(format % (float(number or 0), space, symbols[depth]))
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py
deleted file mode 100644
index 07848dac7c..0000000000
--- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py
+++ /dev/null
@@ -1,90 +0,0 @@
1# This library is free software; you can redistribute it and/or
2# modify it under the terms of the GNU Lesser General Public
3# License as published by the Free Software Foundation; either
4# version 2.1 of the License, or (at your option) any later version.
5#
6# This library is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9# Lesser General Public License for more details.
10#
11# You should have received a copy of the GNU Lesser General Public
12# License along with this library; if not, write to the
13# Free Software Foundation, Inc.,
14# 59 Temple Place, Suite 330,
15# Boston, MA 02111-1307 USA
16
17# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18
19import httplib
20import urllib2
21
22try:
23 from M2Crypto import SSL
24 from M2Crypto import httpslib
25 from M2Crypto import m2urllib2
26
27 SSL.Connection.clientPostConnectionCheck = None
28 have_m2crypto = True
29except ImportError:
30 have_m2crypto = False
31
32DEBUG = None
33
34if have_m2crypto:
35
36 class M2SSLFactory:
37
38 def __init__(self, ssl_ca_cert, ssl_context):
39 self.ssl_context = self._get_ssl_context(ssl_ca_cert, ssl_context)
40
41 def _get_ssl_context(self, ssl_ca_cert, ssl_context):
42 """
43 Create an ssl context using the CA cert file or ssl context.
44
45 The CA cert is used first if it was passed as an option. If not,
46 then the supplied ssl context is used. If no ssl context was supplied,
47 None is returned.
48 """
49 if ssl_ca_cert:
50 context = SSL.Context()
51 context.load_verify_locations(ssl_ca_cert)
52 context.set_verify(SSL.verify_none, -1)
53 return context
54 else:
55 return ssl_context
56
57 def create_https_connection(self, host, response_class = None):
58 connection = httplib.HTTPSConnection(host, self.ssl_context)
59 if response_class:
60 connection.response_class = response_class
61 return connection
62
63 def create_opener(self, *handlers):
64 return m2urllib2.build_opener(self.ssl_context, *handlers)
65
66
67class SSLFactory:
68
69 def create_https_connection(self, host, response_class = None):
70 connection = httplib.HTTPSConnection(host)
71 if response_class:
72 connection.response_class = response_class
73 return connection
74
75 def create_opener(self, *handlers):
76 return urllib2.build_opener(*handlers)
77
78
79
80def get_factory(ssl_ca_cert = None, ssl_context = None):
81 """ Return an SSLFactory, based on if M2Crypto is available. """
82 if have_m2crypto:
83 return M2SSLFactory(ssl_ca_cert, ssl_context)
84 else:
85 # Log here if someone provides the args but we don't use them.
86 if ssl_ca_cert or ssl_context:
87 if DEBUG:
88 DEBUG.warning("SSL arguments supplied, but M2Crypto is not available. "
89 "Using Python SSL.")
90 return SSLFactory()