diff options
Diffstat (limited to 'scripts/lib')
7 files changed, 0 insertions, 3688 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py deleted file mode 100644 index 7bcd9d5541..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/__init__.py +++ /dev/null | |||
@@ -1,53 +0,0 @@ | |||
1 | # This program is free software; you can redistribute it and/or modify | ||
2 | # it under the terms of the GNU General Public License as published by | ||
3 | # the Free Software Foundation; either version 2 of the License, or | ||
4 | # (at your option) any later version. | ||
5 | # | ||
6 | # This program is distributed in the hope that it will be useful, | ||
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
9 | # GNU Library General Public License for more details. | ||
10 | # | ||
11 | # You should have received a copy of the GNU General Public License | ||
12 | # along with this program; if not, write to the Free Software | ||
13 | # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
14 | |||
15 | # Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko | ||
16 | |||
17 | # $Id: __init__.py,v 1.20 2006/09/22 00:58:55 mstenner Exp $ | ||
18 | |||
19 | """A high-level cross-protocol url-grabber. | ||
20 | |||
21 | Using urlgrabber, data can be fetched in three basic ways: | ||
22 | |||
23 | urlgrab(url) copy the file to the local filesystem | ||
24 | urlopen(url) open the remote file and return a file object | ||
25 | (like urllib2.urlopen) | ||
26 | urlread(url) return the contents of the file as a string | ||
27 | |||
28 | When using these functions (or methods), urlgrabber supports the | ||
29 | following features: | ||
30 | |||
31 | * identical behavior for http://, ftp://, and file:// urls | ||
32 | * http keepalive - faster downloads of many files by using | ||
33 | only a single connection | ||
34 | * byte ranges - fetch only a portion of the file | ||
35 | * reget - for a urlgrab, resume a partial download | ||
36 | * progress meters - the ability to report download progress | ||
37 | automatically, even when using urlopen! | ||
38 | * throttling - restrict bandwidth usage | ||
39 | * retries - automatically retry a download if it fails. The | ||
40 | number of retries and failure types are configurable. | ||
41 | * authenticated server access for http and ftp | ||
42 | * proxy support - support for authenticated http and ftp proxies | ||
43 | * mirror groups - treat a list of mirrors as a single source, | ||
44 | automatically switching mirrors if there is a failure. | ||
45 | """ | ||
46 | |||
47 | __version__ = '3.1.0' | ||
48 | __date__ = '2006/09/21' | ||
49 | __author__ = 'Michael D. Stenner <mstenner@linux.duke.edu>, ' \ | ||
50 | 'Ryan Tomayko <rtomayko@naeblis.cx>' | ||
51 | __url__ = 'http://linux.duke.edu/projects/urlgrabber/' | ||
52 | |||
53 | from grabber import urlgrab, urlopen, urlread | ||
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py deleted file mode 100644 index 001b4e32d6..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py +++ /dev/null | |||
@@ -1,463 +0,0 @@ | |||
1 | # This library is free software; you can redistribute it and/or | ||
2 | # modify it under the terms of the GNU Lesser General Public | ||
3 | # License as published by the Free Software Foundation; either | ||
4 | # version 2.1 of the License, or (at your option) any later version. | ||
5 | # | ||
6 | # This library is distributed in the hope that it will be useful, | ||
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
9 | # Lesser General Public License for more details. | ||
10 | # | ||
11 | # You should have received a copy of the GNU Lesser General Public | ||
12 | # License along with this library; if not, write to the | ||
13 | # Free Software Foundation, Inc., | ||
14 | # 59 Temple Place, Suite 330, | ||
15 | # Boston, MA 02111-1307 USA | ||
16 | |||
17 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||
18 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | ||
19 | |||
20 | # $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $ | ||
21 | |||
22 | import os | ||
23 | import stat | ||
24 | import urllib | ||
25 | import urllib2 | ||
26 | import rfc822 | ||
27 | |||
28 | DEBUG = None | ||
29 | |||
30 | try: | ||
31 | from cStringIO import StringIO | ||
32 | except ImportError, msg: | ||
33 | from StringIO import StringIO | ||
34 | |||
35 | class RangeError(IOError): | ||
36 | """Error raised when an unsatisfiable range is requested.""" | ||
37 | pass | ||
38 | |||
39 | class HTTPRangeHandler(urllib2.BaseHandler): | ||
40 | """Handler that enables HTTP Range headers. | ||
41 | |||
42 | This was extremely simple. The Range header is a HTTP feature to | ||
43 | begin with so all this class does is tell urllib2 that the | ||
44 | "206 Partial Content" reponse from the HTTP server is what we | ||
45 | expected. | ||
46 | |||
47 | Example: | ||
48 | import urllib2 | ||
49 | import byterange | ||
50 | |||
51 | range_handler = range.HTTPRangeHandler() | ||
52 | opener = urllib2.build_opener(range_handler) | ||
53 | |||
54 | # install it | ||
55 | urllib2.install_opener(opener) | ||
56 | |||
57 | # create Request and set Range header | ||
58 | req = urllib2.Request('http://www.python.org/') | ||
59 | req.header['Range'] = 'bytes=30-50' | ||
60 | f = urllib2.urlopen(req) | ||
61 | """ | ||
62 | |||
63 | def http_error_206(self, req, fp, code, msg, hdrs): | ||
64 | # 206 Partial Content Response | ||
65 | r = urllib.addinfourl(fp, hdrs, req.get_full_url()) | ||
66 | r.code = code | ||
67 | r.msg = msg | ||
68 | return r | ||
69 | |||
70 | def http_error_416(self, req, fp, code, msg, hdrs): | ||
71 | # HTTP's Range Not Satisfiable error | ||
72 | raise RangeError('Requested Range Not Satisfiable') | ||
73 | |||
74 | class HTTPSRangeHandler(HTTPRangeHandler): | ||
75 | """ Range Header support for HTTPS. """ | ||
76 | |||
77 | def https_error_206(self, req, fp, code, msg, hdrs): | ||
78 | return self.http_error_206(req, fp, code, msg, hdrs) | ||
79 | |||
80 | def https_error_416(self, req, fp, code, msg, hdrs): | ||
81 | self.https_error_416(req, fp, code, msg, hdrs) | ||
82 | |||
83 | class RangeableFileObject: | ||
84 | """File object wrapper to enable raw range handling. | ||
85 | This was implemented primarilary for handling range | ||
86 | specifications for file:// urls. This object effectively makes | ||
87 | a file object look like it consists only of a range of bytes in | ||
88 | the stream. | ||
89 | |||
90 | Examples: | ||
91 | # expose 10 bytes, starting at byte position 20, from | ||
92 | # /etc/aliases. | ||
93 | >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) | ||
94 | # seek seeks within the range (to position 23 in this case) | ||
95 | >>> fo.seek(3) | ||
96 | # tell tells where your at _within the range_ (position 3 in | ||
97 | # this case) | ||
98 | >>> fo.tell() | ||
99 | # read EOFs if an attempt is made to read past the last | ||
100 | # byte in the range. the following will return only 7 bytes. | ||
101 | >>> fo.read(30) | ||
102 | """ | ||
103 | |||
104 | def __init__(self, fo, rangetup): | ||
105 | """Create a RangeableFileObject. | ||
106 | fo -- a file like object. only the read() method need be | ||
107 | supported but supporting an optimized seek() is | ||
108 | preferable. | ||
109 | rangetup -- a (firstbyte,lastbyte) tuple specifying the range | ||
110 | to work over. | ||
111 | The file object provided is assumed to be at byte offset 0. | ||
112 | """ | ||
113 | self.fo = fo | ||
114 | (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) | ||
115 | self.realpos = 0 | ||
116 | self._do_seek(self.firstbyte) | ||
117 | |||
118 | def __getattr__(self, name): | ||
119 | """This effectively allows us to wrap at the instance level. | ||
120 | Any attribute not found in _this_ object will be searched for | ||
121 | in self.fo. This includes methods.""" | ||
122 | if hasattr(self.fo, name): | ||
123 | return getattr(self.fo, name) | ||
124 | raise AttributeError, name | ||
125 | |||
126 | def tell(self): | ||
127 | """Return the position within the range. | ||
128 | This is different from fo.seek in that position 0 is the | ||
129 | first byte position of the range tuple. For example, if | ||
130 | this object was created with a range tuple of (500,899), | ||
131 | tell() will return 0 when at byte position 500 of the file. | ||
132 | """ | ||
133 | return (self.realpos - self.firstbyte) | ||
134 | |||
135 | def seek(self,offset,whence=0): | ||
136 | """Seek within the byte range. | ||
137 | Positioning is identical to that described under tell(). | ||
138 | """ | ||
139 | assert whence in (0, 1, 2) | ||
140 | if whence == 0: # absolute seek | ||
141 | realoffset = self.firstbyte + offset | ||
142 | elif whence == 1: # relative seek | ||
143 | realoffset = self.realpos + offset | ||
144 | elif whence == 2: # absolute from end of file | ||
145 | # XXX: are we raising the right Error here? | ||
146 | raise IOError('seek from end of file not supported.') | ||
147 | |||
148 | # do not allow seek past lastbyte in range | ||
149 | if self.lastbyte and (realoffset >= self.lastbyte): | ||
150 | realoffset = self.lastbyte | ||
151 | |||
152 | self._do_seek(realoffset - self.realpos) | ||
153 | |||
154 | def read(self, size=-1): | ||
155 | """Read within the range. | ||
156 | This method will limit the size read based on the range. | ||
157 | """ | ||
158 | size = self._calc_read_size(size) | ||
159 | rslt = self.fo.read(size) | ||
160 | self.realpos += len(rslt) | ||
161 | return rslt | ||
162 | |||
163 | def readline(self, size=-1): | ||
164 | """Read lines within the range. | ||
165 | This method will limit the size read based on the range. | ||
166 | """ | ||
167 | size = self._calc_read_size(size) | ||
168 | rslt = self.fo.readline(size) | ||
169 | self.realpos += len(rslt) | ||
170 | return rslt | ||
171 | |||
172 | def _calc_read_size(self, size): | ||
173 | """Handles calculating the amount of data to read based on | ||
174 | the range. | ||
175 | """ | ||
176 | if self.lastbyte: | ||
177 | if size > -1: | ||
178 | if ((self.realpos + size) >= self.lastbyte): | ||
179 | size = (self.lastbyte - self.realpos) | ||
180 | else: | ||
181 | size = (self.lastbyte - self.realpos) | ||
182 | return size | ||
183 | |||
184 | def _do_seek(self,offset): | ||
185 | """Seek based on whether wrapped object supports seek(). | ||
186 | offset is relative to the current position (self.realpos). | ||
187 | """ | ||
188 | assert offset >= 0 | ||
189 | if not hasattr(self.fo, 'seek'): | ||
190 | self._poor_mans_seek(offset) | ||
191 | else: | ||
192 | self.fo.seek(self.realpos + offset) | ||
193 | self.realpos+= offset | ||
194 | |||
195 | def _poor_mans_seek(self,offset): | ||
196 | """Seek by calling the wrapped file objects read() method. | ||
197 | This is used for file like objects that do not have native | ||
198 | seek support. The wrapped objects read() method is called | ||
199 | to manually seek to the desired position. | ||
200 | offset -- read this number of bytes from the wrapped | ||
201 | file object. | ||
202 | raise RangeError if we encounter EOF before reaching the | ||
203 | specified offset. | ||
204 | """ | ||
205 | pos = 0 | ||
206 | bufsize = 1024 | ||
207 | while pos < offset: | ||
208 | if (pos + bufsize) > offset: | ||
209 | bufsize = offset - pos | ||
210 | buf = self.fo.read(bufsize) | ||
211 | if len(buf) != bufsize: | ||
212 | raise RangeError('Requested Range Not Satisfiable') | ||
213 | pos+= bufsize | ||
214 | |||
215 | class FileRangeHandler(urllib2.FileHandler): | ||
216 | """FileHandler subclass that adds Range support. | ||
217 | This class handles Range headers exactly like an HTTP | ||
218 | server would. | ||
219 | """ | ||
220 | def open_local_file(self, req): | ||
221 | import mimetypes | ||
222 | import mimetools | ||
223 | host = req.get_host() | ||
224 | file = req.get_selector() | ||
225 | localfile = urllib.url2pathname(file) | ||
226 | stats = os.stat(localfile) | ||
227 | size = stats[stat.ST_SIZE] | ||
228 | modified = rfc822.formatdate(stats[stat.ST_MTIME]) | ||
229 | mtype = mimetypes.guess_type(file)[0] | ||
230 | if host: | ||
231 | host, port = urllib.splitport(host) | ||
232 | if port or socket.gethostbyname(host) not in self.get_names(): | ||
233 | raise urllib2.URLError('file not on local host') | ||
234 | fo = open(localfile,'rb') | ||
235 | brange = req.headers.get('Range',None) | ||
236 | brange = range_header_to_tuple(brange) | ||
237 | assert brange != () | ||
238 | if brange: | ||
239 | (fb,lb) = brange | ||
240 | if lb == '': lb = size | ||
241 | if fb < 0 or fb > size or lb > size: | ||
242 | raise RangeError('Requested Range Not Satisfiable') | ||
243 | size = (lb - fb) | ||
244 | fo = RangeableFileObject(fo, (fb,lb)) | ||
245 | headers = mimetools.Message(StringIO( | ||
246 | 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % | ||
247 | (mtype or 'text/plain', size, modified))) | ||
248 | return urllib.addinfourl(fo, headers, 'file:'+file) | ||
249 | |||
250 | |||
251 | # FTP Range Support | ||
252 | # Unfortunately, a large amount of base FTP code had to be copied | ||
253 | # from urllib and urllib2 in order to insert the FTP REST command. | ||
254 | # Code modifications for range support have been commented as | ||
255 | # follows: | ||
256 | # -- range support modifications start/end here | ||
257 | |||
258 | from urllib import splitport, splituser, splitpasswd, splitattr, \ | ||
259 | unquote, addclosehook, addinfourl | ||
260 | import ftplib | ||
261 | import socket | ||
262 | import sys | ||
263 | import ftplib | ||
264 | import mimetypes | ||
265 | import mimetools | ||
266 | |||
267 | class FTPRangeHandler(urllib2.FTPHandler): | ||
268 | def ftp_open(self, req): | ||
269 | host = req.get_host() | ||
270 | if not host: | ||
271 | raise IOError, ('ftp error', 'no host given') | ||
272 | host, port = splitport(host) | ||
273 | if port is None: | ||
274 | port = ftplib.FTP_PORT | ||
275 | |||
276 | # username/password handling | ||
277 | user, host = splituser(host) | ||
278 | if user: | ||
279 | user, passwd = splitpasswd(user) | ||
280 | else: | ||
281 | passwd = None | ||
282 | host = unquote(host) | ||
283 | user = unquote(user or '') | ||
284 | passwd = unquote(passwd or '') | ||
285 | |||
286 | try: | ||
287 | host = socket.gethostbyname(host) | ||
288 | except socket.error, msg: | ||
289 | raise urllib2.URLError(msg) | ||
290 | path, attrs = splitattr(req.get_selector()) | ||
291 | dirs = path.split('/') | ||
292 | dirs = map(unquote, dirs) | ||
293 | dirs, file = dirs[:-1], dirs[-1] | ||
294 | if dirs and not dirs[0]: | ||
295 | dirs = dirs[1:] | ||
296 | try: | ||
297 | fw = self.connect_ftp(user, passwd, host, port, dirs) | ||
298 | type = file and 'I' or 'D' | ||
299 | for attr in attrs: | ||
300 | attr, value = splitattr(attr) | ||
301 | if attr.lower() == 'type' and \ | ||
302 | value in ('a', 'A', 'i', 'I', 'd', 'D'): | ||
303 | type = value.upper() | ||
304 | |||
305 | # -- range support modifications start here | ||
306 | rest = None | ||
307 | range_tup = range_header_to_tuple(req.headers.get('Range',None)) | ||
308 | assert range_tup != () | ||
309 | if range_tup: | ||
310 | (fb,lb) = range_tup | ||
311 | if fb > 0: rest = fb | ||
312 | # -- range support modifications end here | ||
313 | |||
314 | fp, retrlen = fw.retrfile(file, type, rest) | ||
315 | |||
316 | # -- range support modifications start here | ||
317 | if range_tup: | ||
318 | (fb,lb) = range_tup | ||
319 | if lb == '': | ||
320 | if retrlen is None or retrlen == 0: | ||
321 | raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') | ||
322 | lb = retrlen | ||
323 | retrlen = lb - fb | ||
324 | if retrlen < 0: | ||
325 | # beginning of range is larger than file | ||
326 | raise RangeError('Requested Range Not Satisfiable') | ||
327 | else: | ||
328 | retrlen = lb - fb | ||
329 | fp = RangeableFileObject(fp, (0,retrlen)) | ||
330 | # -- range support modifications end here | ||
331 | |||
332 | headers = "" | ||
333 | mtype = mimetypes.guess_type(req.get_full_url())[0] | ||
334 | if mtype: | ||
335 | headers += "Content-Type: %s\n" % mtype | ||
336 | if retrlen is not None and retrlen >= 0: | ||
337 | headers += "Content-Length: %d\n" % retrlen | ||
338 | sf = StringIO(headers) | ||
339 | headers = mimetools.Message(sf) | ||
340 | return addinfourl(fp, headers, req.get_full_url()) | ||
341 | except ftplib.all_errors, msg: | ||
342 | raise IOError, ('ftp error', msg), sys.exc_info()[2] | ||
343 | |||
344 | def connect_ftp(self, user, passwd, host, port, dirs): | ||
345 | fw = ftpwrapper(user, passwd, host, port, dirs) | ||
346 | return fw | ||
347 | |||
348 | class ftpwrapper(urllib.ftpwrapper): | ||
349 | # range support note: | ||
350 | # this ftpwrapper code is copied directly from | ||
351 | # urllib. The only enhancement is to add the rest | ||
352 | # argument and pass it on to ftp.ntransfercmd | ||
353 | def retrfile(self, file, type, rest=None): | ||
354 | self.endtransfer() | ||
355 | if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 | ||
356 | else: cmd = 'TYPE ' + type; isdir = 0 | ||
357 | try: | ||
358 | self.ftp.voidcmd(cmd) | ||
359 | except ftplib.all_errors: | ||
360 | self.init() | ||
361 | self.ftp.voidcmd(cmd) | ||
362 | conn = None | ||
363 | if file and not isdir: | ||
364 | # Use nlst to see if the file exists at all | ||
365 | try: | ||
366 | self.ftp.nlst(file) | ||
367 | except ftplib.error_perm, reason: | ||
368 | raise IOError, ('ftp error', reason), sys.exc_info()[2] | ||
369 | # Restore the transfer mode! | ||
370 | self.ftp.voidcmd(cmd) | ||
371 | # Try to retrieve as a file | ||
372 | try: | ||
373 | cmd = 'RETR ' + file | ||
374 | conn = self.ftp.ntransfercmd(cmd, rest) | ||
375 | except ftplib.error_perm, reason: | ||
376 | if str(reason)[:3] == '501': | ||
377 | # workaround for REST not supported error | ||
378 | fp, retrlen = self.retrfile(file, type) | ||
379 | fp = RangeableFileObject(fp, (rest,'')) | ||
380 | return (fp, retrlen) | ||
381 | elif str(reason)[:3] != '550': | ||
382 | raise IOError, ('ftp error', reason), sys.exc_info()[2] | ||
383 | if not conn: | ||
384 | # Set transfer mode to ASCII! | ||
385 | self.ftp.voidcmd('TYPE A') | ||
386 | # Try a directory listing | ||
387 | if file: cmd = 'LIST ' + file | ||
388 | else: cmd = 'LIST' | ||
389 | conn = self.ftp.ntransfercmd(cmd) | ||
390 | self.busy = 1 | ||
391 | # Pass back both a suitably decorated object and a retrieval length | ||
392 | return (addclosehook(conn[0].makefile('rb'), | ||
393 | self.endtransfer), conn[1]) | ||
394 | |||
395 | |||
396 | #################################################################### | ||
397 | # Range Tuple Functions | ||
398 | # XXX: These range tuple functions might go better in a class. | ||
399 | |||
400 | _rangere = None | ||
401 | def range_header_to_tuple(range_header): | ||
402 | """Get a (firstbyte,lastbyte) tuple from a Range header value. | ||
403 | |||
404 | Range headers have the form "bytes=<firstbyte>-<lastbyte>". This | ||
405 | function pulls the firstbyte and lastbyte values and returns | ||
406 | a (firstbyte,lastbyte) tuple. If lastbyte is not specified in | ||
407 | the header value, it is returned as an empty string in the | ||
408 | tuple. | ||
409 | |||
410 | Return None if range_header is None | ||
411 | Return () if range_header does not conform to the range spec | ||
412 | pattern. | ||
413 | |||
414 | """ | ||
415 | global _rangere | ||
416 | if range_header is None: return None | ||
417 | if _rangere is None: | ||
418 | import re | ||
419 | _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') | ||
420 | match = _rangere.match(range_header) | ||
421 | if match: | ||
422 | tup = range_tuple_normalize(match.group(1,2)) | ||
423 | if tup and tup[1]: | ||
424 | tup = (tup[0],tup[1]+1) | ||
425 | return tup | ||
426 | return () | ||
427 | |||
428 | def range_tuple_to_header(range_tup): | ||
429 | """Convert a range tuple to a Range header value. | ||
430 | Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None | ||
431 | if no range is needed. | ||
432 | """ | ||
433 | if range_tup is None: return None | ||
434 | range_tup = range_tuple_normalize(range_tup) | ||
435 | if range_tup: | ||
436 | if range_tup[1]: | ||
437 | range_tup = (range_tup[0],range_tup[1] - 1) | ||
438 | return 'bytes=%s-%s' % range_tup | ||
439 | |||
440 | def range_tuple_normalize(range_tup): | ||
441 | """Normalize a (first_byte,last_byte) range tuple. | ||
442 | Return a tuple whose first element is guaranteed to be an int | ||
443 | and whose second element will be '' (meaning: the last byte) or | ||
444 | an int. Finally, return None if the normalized tuple == (0,'') | ||
445 | as that is equivelant to retrieving the entire file. | ||
446 | """ | ||
447 | if range_tup is None: return None | ||
448 | # handle first byte | ||
449 | fb = range_tup[0] | ||
450 | if fb in (None,''): fb = 0 | ||
451 | else: fb = int(fb) | ||
452 | # handle last byte | ||
453 | try: lb = range_tup[1] | ||
454 | except IndexError: lb = '' | ||
455 | else: | ||
456 | if lb is None: lb = '' | ||
457 | elif lb != '': lb = int(lb) | ||
458 | # check if range is over the entire file | ||
459 | if (fb,lb) == (0,''): return None | ||
460 | # check that the range is valid | ||
461 | if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) | ||
462 | return (fb,lb) | ||
463 | |||
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py deleted file mode 100644 index fefdab36f6..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/grabber.py +++ /dev/null | |||
@@ -1,1477 +0,0 @@ | |||
1 | # This library is free software; you can redistribute it and/or | ||
2 | # modify it under the terms of the GNU Lesser General Public | ||
3 | # License as published by the Free Software Foundation; either | ||
4 | # version 2.1 of the License, or (at your option) any later version. | ||
5 | # | ||
6 | # This library is distributed in the hope that it will be useful, | ||
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
9 | # Lesser General Public License for more details. | ||
10 | # | ||
11 | # You should have received a copy of the GNU Lesser General Public | ||
12 | # License along with this library; if not, write to the | ||
13 | # Free Software Foundation, Inc., | ||
14 | # 59 Temple Place, Suite 330, | ||
15 | # Boston, MA 02111-1307 USA | ||
16 | |||
17 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||
18 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | ||
19 | |||
20 | """A high-level cross-protocol url-grabber. | ||
21 | |||
22 | GENERAL ARGUMENTS (kwargs) | ||
23 | |||
24 | Where possible, the module-level default is indicated, and legal | ||
25 | values are provided. | ||
26 | |||
27 | copy_local = 0 [0|1] | ||
28 | |||
29 | ignored except for file:// urls, in which case it specifies | ||
30 | whether urlgrab should still make a copy of the file, or simply | ||
31 | point to the existing copy. The module level default for this | ||
32 | option is 0. | ||
33 | |||
34 | close_connection = 0 [0|1] | ||
35 | |||
36 | tells URLGrabber to close the connection after a file has been | ||
37 | transfered. This is ignored unless the download happens with the | ||
38 | http keepalive handler (keepalive=1). Otherwise, the connection | ||
39 | is left open for further use. The module level default for this | ||
40 | option is 0 (keepalive connections will not be closed). | ||
41 | |||
42 | keepalive = 1 [0|1] | ||
43 | |||
44 | specifies whether keepalive should be used for HTTP/1.1 servers | ||
45 | that support it. The module level default for this option is 1 | ||
46 | (keepalive is enabled). | ||
47 | |||
48 | progress_obj = None | ||
49 | |||
50 | a class instance that supports the following methods: | ||
51 | po.start(filename, url, basename, length, text) | ||
52 | # length will be None if unknown | ||
53 | po.update(read) # read == bytes read so far | ||
54 | po.end() | ||
55 | |||
56 | text = None | ||
57 | |||
58 | specifies an alternativ text item in the beginning of the progress | ||
59 | bar line. If not given, the basename of the file is used. | ||
60 | |||
61 | throttle = 1.0 | ||
62 | |||
63 | a number - if it's an int, it's the bytes/second throttle limit. | ||
64 | If it's a float, it is first multiplied by bandwidth. If throttle | ||
65 | == 0, throttling is disabled. If None, the module-level default | ||
66 | (which can be set on default_grabber.throttle) is used. See | ||
67 | BANDWIDTH THROTTLING for more information. | ||
68 | |||
69 | timeout = None | ||
70 | |||
71 | a positive float expressing the number of seconds to wait for socket | ||
72 | operations. If the value is None or 0.0, socket operations will block | ||
73 | forever. Setting this option causes urlgrabber to call the settimeout | ||
74 | method on the Socket object used for the request. See the Python | ||
75 | documentation on settimeout for more information. | ||
76 | http://www.python.org/doc/current/lib/socket-objects.html | ||
77 | |||
78 | bandwidth = 0 | ||
79 | |||
80 | the nominal max bandwidth in bytes/second. If throttle is a float | ||
81 | and bandwidth == 0, throttling is disabled. If None, the | ||
82 | module-level default (which can be set on | ||
83 | default_grabber.bandwidth) is used. See BANDWIDTH THROTTLING for | ||
84 | more information. | ||
85 | |||
86 | range = None | ||
87 | |||
88 | a tuple of the form (first_byte, last_byte) describing a byte | ||
89 | range to retrieve. Either or both of the values may set to | ||
90 | None. If first_byte is None, byte offset 0 is assumed. If | ||
91 | last_byte is None, the last byte available is assumed. Note that | ||
92 | the range specification is python-like in that (0,10) will yeild | ||
93 | the first 10 bytes of the file. | ||
94 | |||
95 | If set to None, no range will be used. | ||
96 | |||
97 | reget = None [None|'simple'|'check_timestamp'] | ||
98 | |||
99 | whether to attempt to reget a partially-downloaded file. Reget | ||
100 | only applies to .urlgrab and (obviously) only if there is a | ||
101 | partially downloaded file. Reget has two modes: | ||
102 | |||
103 | 'simple' -- the local file will always be trusted. If there | ||
104 | are 100 bytes in the local file, then the download will always | ||
105 | begin 100 bytes into the requested file. | ||
106 | |||
107 | 'check_timestamp' -- the timestamp of the server file will be | ||
108 | compared to the timestamp of the local file. ONLY if the | ||
109 | local file is newer than or the same age as the server file | ||
110 | will reget be used. If the server file is newer, or the | ||
111 | timestamp is not returned, the entire file will be fetched. | ||
112 | |||
113 | NOTE: urlgrabber can do very little to verify that the partial | ||
114 | file on disk is identical to the beginning of the remote file. | ||
115 | You may want to either employ a custom "checkfunc" or simply avoid | ||
116 | using reget in situations where corruption is a concern. | ||
117 | |||
118 | user_agent = 'urlgrabber/VERSION' | ||
119 | |||
120 | a string, usually of the form 'AGENT/VERSION' that is provided to | ||
121 | HTTP servers in the User-agent header. The module level default | ||
122 | for this option is "urlgrabber/VERSION". | ||
123 | |||
124 | http_headers = None | ||
125 | |||
126 | a tuple of 2-tuples, each containing a header and value. These | ||
127 | will be used for http and https requests only. For example, you | ||
128 | can do | ||
129 | http_headers = (('Pragma', 'no-cache'),) | ||
130 | |||
131 | ftp_headers = None | ||
132 | |||
133 | this is just like http_headers, but will be used for ftp requests. | ||
134 | |||
135 | proxies = None | ||
136 | |||
137 | a dictionary that maps protocol schemes to proxy hosts. For | ||
138 | example, to use a proxy server on host "foo" port 3128 for http | ||
139 | and https URLs: | ||
140 | proxies={ 'http' : 'http://foo:3128', 'https' : 'http://foo:3128' } | ||
141 | note that proxy authentication information may be provided using | ||
142 | normal URL constructs: | ||
143 | proxies={ 'http' : 'http://user:host@foo:3128' } | ||
144 | Lastly, if proxies is None, the default environment settings will | ||
145 | be used. | ||
146 | |||
147 | prefix = None | ||
148 | |||
149 | a url prefix that will be prepended to all requested urls. For | ||
150 | example: | ||
151 | g = URLGrabber(prefix='http://foo.com/mirror/') | ||
152 | g.urlgrab('some/file.txt') | ||
153 | ## this will fetch 'http://foo.com/mirror/some/file.txt' | ||
154 | This option exists primarily to allow identical behavior to | ||
155 | MirrorGroup (and derived) instances. Note: a '/' will be inserted | ||
156 | if necessary, so you cannot specify a prefix that ends with a | ||
157 | partial file or directory name. | ||
158 | |||
159 | opener = None | ||
160 | |||
161 | Overrides the default urllib2.OpenerDirector provided to urllib2 | ||
162 | when making requests. This option exists so that the urllib2 | ||
163 | handler chain may be customized. Note that the range, reget, | ||
164 | proxy, and keepalive features require that custom handlers be | ||
165 | provided to urllib2 in order to function properly. If an opener | ||
166 | option is provided, no attempt is made by urlgrabber to ensure | ||
167 | chain integrity. You are responsible for ensuring that any | ||
168 | extension handlers are present if said features are required. | ||
169 | |||
170 | data = None | ||
171 | |||
172 | Only relevant for the HTTP family (and ignored for other | ||
173 | protocols), this allows HTTP POSTs. When the data kwarg is | ||
174 | present (and not None), an HTTP request will automatically become | ||
175 | a POST rather than GET. This is done by direct passthrough to | ||
176 | urllib2. If you use this, you may also want to set the | ||
177 | 'Content-length' and 'Content-type' headers with the http_headers | ||
178 | option. Note that python 2.2 handles the case of these | ||
179 | badly and if you do not use the proper case (shown here), your | ||
180 | values will be overridden with the defaults. | ||
181 | |||
182 | |||
183 | RETRY RELATED ARGUMENTS | ||
184 | |||
185 | retry = None | ||
186 | |||
187 | the number of times to retry the grab before bailing. If this is | ||
188 | zero, it will retry forever. This was intentional... really, it | ||
189 | was :). If this value is not supplied or is supplied but is None | ||
190 | retrying does not occur. | ||
191 | |||
192 | retrycodes = [-1,2,4,5,6,7] | ||
193 | |||
194 | a sequence of errorcodes (values of e.errno) for which it should | ||
195 | retry. See the doc on URLGrabError for more details on this. You | ||
196 | might consider modifying a copy of the default codes rather than | ||
197 | building yours from scratch so that if the list is extended in the | ||
198 | future (or one code is split into two) you can still enjoy the | ||
199 | benefits of the default list. You can do that with something like | ||
200 | this: | ||
201 | |||
202 | retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes | ||
203 | if 12 not in retrycodes: | ||
204 | retrycodes.append(12) | ||
205 | |||
206 | checkfunc = None | ||
207 | |||
208 | a function to do additional checks. This defaults to None, which | ||
209 | means no additional checking. The function should simply return | ||
210 | on a successful check. It should raise URLGrabError on an | ||
211 | unsuccessful check. Raising of any other exception will be | ||
212 | considered immediate failure and no retries will occur. | ||
213 | |||
214 | If it raises URLGrabError, the error code will determine the retry | ||
215 | behavior. Negative error numbers are reserved for use by these | ||
216 | passed in functions, so you can use many negative numbers for | ||
217 | different types of failure. By default, -1 results in a retry, | ||
218 | but this can be customized with retrycodes. | ||
219 | |||
220 | If you simply pass in a function, it will be given exactly one | ||
221 | argument: a CallbackObject instance with the .url attribute | ||
222 | defined and either .filename (for urlgrab) or .data (for urlread). | ||
223 | For urlgrab, .filename is the name of the local file. For | ||
224 | urlread, .data is the actual string data. If you need other | ||
225 | arguments passed to the callback (program state of some sort), you | ||
226 | can do so like this: | ||
227 | |||
228 | checkfunc=(function, ('arg1', 2), {'kwarg': 3}) | ||
229 | |||
230 | if the downloaded file has filename /tmp/stuff, then this will | ||
231 | result in this call (for urlgrab): | ||
232 | |||
233 | function(obj, 'arg1', 2, kwarg=3) | ||
234 | # obj.filename = '/tmp/stuff' | ||
235 | # obj.url = 'http://foo.com/stuff' | ||
236 | |||
237 | NOTE: both the "args" tuple and "kwargs" dict must be present if | ||
238 | you use this syntax, but either (or both) can be empty. | ||
239 | |||
240 | failure_callback = None | ||
241 | |||
242 | The callback that gets called during retries when an attempt to | ||
243 | fetch a file fails. The syntax for specifying the callback is | ||
244 | identical to checkfunc, except for the attributes defined in the | ||
245 | CallbackObject instance. The attributes for failure_callback are: | ||
246 | |||
247 | exception = the raised exception | ||
248 | url = the url we're trying to fetch | ||
249 | tries = the number of tries so far (including this one) | ||
250 | retry = the value of the retry option | ||
251 | |||
252 | The callback is present primarily to inform the calling program of | ||
253 | the failure, but if it raises an exception (including the one it's | ||
254 | passed) that exception will NOT be caught and will therefore cause | ||
255 | future retries to be aborted. | ||
256 | |||
257 | The callback is called for EVERY failure, including the last one. | ||
258 | On the last try, the callback can raise an alternate exception, | ||
259 | but it cannot (without severe trickiness) prevent the exception | ||
260 | from being raised. | ||
261 | |||
262 | interrupt_callback = None | ||
263 | |||
264 | This callback is called if KeyboardInterrupt is received at any | ||
265 | point in the transfer. Basically, this callback can have three | ||
266 | impacts on the fetch process based on the way it exits: | ||
267 | |||
268 | 1) raise no exception: the current fetch will be aborted, but | ||
269 | any further retries will still take place | ||
270 | |||
271 | 2) raise a URLGrabError: if you're using a MirrorGroup, then | ||
272 | this will prompt a failover to the next mirror according to | ||
273 | the behavior of the MirrorGroup subclass. It is recommended | ||
274 | that you raise URLGrabError with code 15, 'user abort'. If | ||
275 | you are NOT using a MirrorGroup subclass, then this is the | ||
276 | same as (3). | ||
277 | |||
278 | 3) raise some other exception (such as KeyboardInterrupt), which | ||
279 | will not be caught at either the grabber or mirror levels. | ||
280 | That is, it will be raised up all the way to the caller. | ||
281 | |||
282 | This callback is very similar to failure_callback. They are | ||
283 | passed the same arguments, so you could use the same function for | ||
284 | both. | ||
285 | |||
286 | urlparser = URLParser() | ||
287 | |||
288 | The URLParser class handles pre-processing of URLs, including | ||
289 | auth-handling for user/pass encoded in http urls, file handing | ||
290 | (that is, filenames not sent as a URL), and URL quoting. If you | ||
291 | want to override any of this behavior, you can pass in a | ||
292 | replacement instance. See also the 'quote' option. | ||
293 | |||
294 | quote = None | ||
295 | |||
296 | Whether or not to quote the path portion of a url. | ||
297 | quote = 1 -> quote the URLs (they're not quoted yet) | ||
298 | quote = 0 -> do not quote them (they're already quoted) | ||
299 | quote = None -> guess what to do | ||
300 | |||
301 | This option only affects proper urls like 'file:///etc/passwd'; it | ||
302 | does not affect 'raw' filenames like '/etc/passwd'. The latter | ||
303 | will always be quoted as they are converted to URLs. Also, only | ||
304 | the path part of a url is quoted. If you need more fine-grained | ||
305 | control, you should probably subclass URLParser and pass it in via | ||
306 | the 'urlparser' option. | ||
307 | |||
308 | BANDWIDTH THROTTLING | ||
309 | |||
310 | urlgrabber supports throttling via two values: throttle and | ||
311 | bandwidth Between the two, you can either specify and absolute | ||
312 | throttle threshold or specify a theshold as a fraction of maximum | ||
313 | available bandwidth. | ||
314 | |||
315 | throttle is a number - if it's an int, it's the bytes/second | ||
316 | throttle limit. If it's a float, it is first multiplied by | ||
317 | bandwidth. If throttle == 0, throttling is disabled. If None, the | ||
318 | module-level default (which can be set with set_throttle) is used. | ||
319 | |||
320 | bandwidth is the nominal max bandwidth in bytes/second. If throttle | ||
321 | is a float and bandwidth == 0, throttling is disabled. If None, the | ||
322 | module-level default (which can be set with set_bandwidth) is used. | ||
323 | |||
324 | THROTTLING EXAMPLES: | ||
325 | |||
326 | Lets say you have a 100 Mbps connection. This is (about) 10^8 bits | ||
327 | per second, or 12,500,000 Bytes per second. You have a number of | ||
328 | throttling options: | ||
329 | |||
330 | *) set_bandwidth(12500000); set_throttle(0.5) # throttle is a float | ||
331 | |||
332 | This will limit urlgrab to use half of your available bandwidth. | ||
333 | |||
334 | *) set_throttle(6250000) # throttle is an int | ||
335 | |||
336 | This will also limit urlgrab to use half of your available | ||
337 | bandwidth, regardless of what bandwidth is set to. | ||
338 | |||
339 | *) set_throttle(6250000); set_throttle(1.0) # float | ||
340 | |||
341 | Use half your bandwidth | ||
342 | |||
343 | *) set_throttle(6250000); set_throttle(2.0) # float | ||
344 | |||
345 | Use up to 12,500,000 Bytes per second (your nominal max bandwidth) | ||
346 | |||
347 | *) set_throttle(6250000); set_throttle(0) # throttle = 0 | ||
348 | |||
349 | Disable throttling - this is more efficient than a very large | ||
350 | throttle setting. | ||
351 | |||
352 | *) set_throttle(0); set_throttle(1.0) # throttle is float, bandwidth = 0 | ||
353 | |||
354 | Disable throttling - this is the default when the module is loaded. | ||
355 | |||
356 | SUGGESTED AUTHOR IMPLEMENTATION (THROTTLING) | ||
357 | |||
358 | While this is flexible, it's not extremely obvious to the user. I | ||
359 | suggest you implement a float throttle as a percent to make the | ||
360 | distinction between absolute and relative throttling very explicit. | ||
361 | |||
362 | Also, you may want to convert the units to something more convenient | ||
363 | than bytes/second, such as kbps or kB/s, etc. | ||
364 | |||
365 | """ | ||
366 | |||
367 | # $Id: grabber.py,v 1.48 2006/09/22 00:58:05 mstenner Exp $ | ||
368 | |||
369 | import os | ||
370 | import os.path | ||
371 | import sys | ||
372 | import urlparse | ||
373 | import rfc822 | ||
374 | import time | ||
375 | import string | ||
376 | import urllib | ||
377 | import urllib2 | ||
378 | from stat import * # S_* and ST_* | ||
379 | |||
380 | ######################################################################## | ||
381 | # MODULE INITIALIZATION | ||
382 | ######################################################################## | ||
383 | try: | ||
384 | exec('from ' + (__name__.split('.'))[0] + ' import __version__') | ||
385 | except: | ||
386 | __version__ = '???' | ||
387 | |||
388 | import sslfactory | ||
389 | |||
390 | auth_handler = urllib2.HTTPBasicAuthHandler( \ | ||
391 | urllib2.HTTPPasswordMgrWithDefaultRealm()) | ||
392 | |||
393 | try: | ||
394 | from i18n import _ | ||
395 | except ImportError, msg: | ||
396 | def _(st): return st | ||
397 | |||
398 | try: | ||
399 | from httplib import HTTPException | ||
400 | except ImportError, msg: | ||
401 | HTTPException = None | ||
402 | |||
403 | try: | ||
404 | # This is a convenient way to make keepalive optional. | ||
405 | # Just rename the module so it can't be imported. | ||
406 | import keepalive | ||
407 | from keepalive import HTTPHandler, HTTPSHandler | ||
408 | have_keepalive = True | ||
409 | except ImportError, msg: | ||
410 | have_keepalive = False | ||
411 | |||
412 | try: | ||
413 | # add in range support conditionally too | ||
414 | import byterange | ||
415 | from byterange import HTTPRangeHandler, HTTPSRangeHandler, \ | ||
416 | FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \ | ||
417 | range_tuple_to_header, RangeError | ||
418 | except ImportError, msg: | ||
419 | range_handlers = () | ||
420 | RangeError = None | ||
421 | have_range = 0 | ||
422 | else: | ||
423 | range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(), | ||
424 | FileRangeHandler(), FTPRangeHandler()) | ||
425 | have_range = 1 | ||
426 | |||
427 | |||
428 | # check whether socket timeout support is available (Python >= 2.3) | ||
429 | import socket | ||
430 | try: | ||
431 | TimeoutError = socket.timeout | ||
432 | have_socket_timeout = True | ||
433 | except AttributeError: | ||
434 | TimeoutError = None | ||
435 | have_socket_timeout = False | ||
436 | |||
437 | ######################################################################## | ||
438 | # functions for debugging output. These functions are here because they | ||
439 | # are also part of the module initialization. | ||
440 | DEBUG = None | ||
441 | def set_logger(DBOBJ): | ||
442 | """Set the DEBUG object. This is called by _init_default_logger when | ||
443 | the environment variable URLGRABBER_DEBUG is set, but can also be | ||
444 | called by a calling program. Basically, if the calling program uses | ||
445 | the logging module and would like to incorporate urlgrabber logging, | ||
446 | then it can do so this way. It's probably not necessary as most | ||
447 | internal logging is only for debugging purposes. | ||
448 | |||
449 | The passed-in object should be a logging.Logger instance. It will | ||
450 | be pushed into the keepalive and byterange modules if they're | ||
451 | being used. The mirror module pulls this object in on import, so | ||
452 | you will need to manually push into it. In fact, you may find it | ||
453 | tidier to simply push your logging object (or objects) into each | ||
454 | of these modules independently. | ||
455 | """ | ||
456 | |||
457 | global DEBUG | ||
458 | DEBUG = DBOBJ | ||
459 | if have_keepalive and keepalive.DEBUG is None: | ||
460 | keepalive.DEBUG = DBOBJ | ||
461 | if have_range and byterange.DEBUG is None: | ||
462 | byterange.DEBUG = DBOBJ | ||
463 | if sslfactory.DEBUG is None: | ||
464 | sslfactory.DEBUG = DBOBJ | ||
465 | |||
466 | def _init_default_logger(): | ||
467 | '''Examines the environment variable URLGRABBER_DEBUG and creates | ||
468 | a logging object (logging.logger) based on the contents. It takes | ||
469 | the form | ||
470 | |||
471 | URLGRABBER_DEBUG=level,filename | ||
472 | |||
473 | where "level" can be either an integer or a log level from the | ||
474 | logging module (DEBUG, INFO, etc). If the integer is zero or | ||
475 | less, logging will be disabled. Filename is the filename where | ||
476 | logs will be sent. If it is "-", then stdout will be used. If | ||
477 | the filename is empty or missing, stderr will be used. If the | ||
478 | variable cannot be processed or the logging module cannot be | ||
479 | imported (python < 2.3) then logging will be disabled. Here are | ||
480 | some examples: | ||
481 | |||
482 | URLGRABBER_DEBUG=1,debug.txt # log everything to debug.txt | ||
483 | URLGRABBER_DEBUG=WARNING,- # log warning and higher to stdout | ||
484 | URLGRABBER_DEBUG=INFO # log info and higher to stderr | ||
485 | |||
486 | This funtion is called during module initialization. It is not | ||
487 | intended to be called from outside. The only reason it is a | ||
488 | function at all is to keep the module-level namespace tidy and to | ||
489 | collect the code into a nice block.''' | ||
490 | |||
491 | try: | ||
492 | dbinfo = os.environ['URLGRABBER_DEBUG'].split(',') | ||
493 | import logging | ||
494 | level = logging._levelNames.get(dbinfo[0], int(dbinfo[0])) | ||
495 | if level < 1: raise ValueError() | ||
496 | |||
497 | formatter = logging.Formatter('%(asctime)s %(message)s') | ||
498 | if len(dbinfo) > 1: filename = dbinfo[1] | ||
499 | else: filename = '' | ||
500 | if filename == '': handler = logging.StreamHandler(sys.stderr) | ||
501 | elif filename == '-': handler = logging.StreamHandler(sys.stdout) | ||
502 | else: handler = logging.FileHandler(filename) | ||
503 | handler.setFormatter(formatter) | ||
504 | DBOBJ = logging.getLogger('urlgrabber') | ||
505 | DBOBJ.addHandler(handler) | ||
506 | DBOBJ.setLevel(level) | ||
507 | except (KeyError, ImportError, ValueError): | ||
508 | DBOBJ = None | ||
509 | set_logger(DBOBJ) | ||
510 | |||
511 | _init_default_logger() | ||
512 | ######################################################################## | ||
513 | # END MODULE INITIALIZATION | ||
514 | ######################################################################## | ||
515 | |||
516 | |||
517 | |||
518 | class URLGrabError(IOError): | ||
519 | """ | ||
520 | URLGrabError error codes: | ||
521 | |||
522 | URLGrabber error codes (0 -- 255) | ||
523 | 0 - everything looks good (you should never see this) | ||
524 | 1 - malformed url | ||
525 | 2 - local file doesn't exist | ||
526 | 3 - request for non-file local file (dir, etc) | ||
527 | 4 - IOError on fetch | ||
528 | 5 - OSError on fetch | ||
529 | 6 - no content length header when we expected one | ||
530 | 7 - HTTPException | ||
531 | 8 - Exceeded read limit (for urlread) | ||
532 | 9 - Requested byte range not satisfiable. | ||
533 | 10 - Byte range requested, but range support unavailable | ||
534 | 11 - Illegal reget mode | ||
535 | 12 - Socket timeout | ||
536 | 13 - malformed proxy url | ||
537 | 14 - HTTPError (includes .code and .exception attributes) | ||
538 | 15 - user abort | ||
539 | |||
540 | MirrorGroup error codes (256 -- 511) | ||
541 | 256 - No more mirrors left to try | ||
542 | |||
543 | Custom (non-builtin) classes derived from MirrorGroup (512 -- 767) | ||
544 | [ this range reserved for application-specific error codes ] | ||
545 | |||
546 | Retry codes (< 0) | ||
547 | -1 - retry the download, unknown reason | ||
548 | |||
549 | Note: to test which group a code is in, you can simply do integer | ||
550 | division by 256: e.errno / 256 | ||
551 | |||
552 | Negative codes are reserved for use by functions passed in to | ||
553 | retrygrab with checkfunc. The value -1 is built in as a generic | ||
554 | retry code and is already included in the retrycodes list. | ||
555 | Therefore, you can create a custom check function that simply | ||
556 | returns -1 and the fetch will be re-tried. For more customized | ||
557 | retries, you can use other negative number and include them in | ||
558 | retry-codes. This is nice for outputting useful messages about | ||
559 | what failed. | ||
560 | |||
561 | You can use these error codes like so: | ||
562 | try: urlgrab(url) | ||
563 | except URLGrabError, e: | ||
564 | if e.errno == 3: ... | ||
565 | # or | ||
566 | print e.strerror | ||
567 | # or simply | ||
568 | print e #### print '[Errno %i] %s' % (e.errno, e.strerror) | ||
569 | """ | ||
570 | pass | ||
571 | |||
572 | class CallbackObject: | ||
573 | """Container for returned callback data. | ||
574 | |||
575 | This is currently a dummy class into which urlgrabber can stuff | ||
576 | information for passing to callbacks. This way, the prototype for | ||
577 | all callbacks is the same, regardless of the data that will be | ||
578 | passed back. Any function that accepts a callback function as an | ||
579 | argument SHOULD document what it will define in this object. | ||
580 | |||
581 | It is possible that this class will have some greater | ||
582 | functionality in the future. | ||
583 | """ | ||
584 | def __init__(self, **kwargs): | ||
585 | self.__dict__.update(kwargs) | ||
586 | |||
587 | def urlgrab(url, filename=None, **kwargs): | ||
588 | """grab the file at <url> and make a local copy at <filename> | ||
589 | If filename is none, the basename of the url is used. | ||
590 | urlgrab returns the filename of the local file, which may be different | ||
591 | from the passed-in filename if the copy_local kwarg == 0. | ||
592 | |||
593 | See module documentation for a description of possible kwargs. | ||
594 | """ | ||
595 | return default_grabber.urlgrab(url, filename, **kwargs) | ||
596 | |||
597 | def urlopen(url, **kwargs): | ||
598 | """open the url and return a file object | ||
599 | If a progress object or throttle specifications exist, then | ||
600 | a special file object will be returned that supports them. | ||
601 | The file object can be treated like any other file object. | ||
602 | |||
603 | See module documentation for a description of possible kwargs. | ||
604 | """ | ||
605 | return default_grabber.urlopen(url, **kwargs) | ||
606 | |||
607 | def urlread(url, limit=None, **kwargs): | ||
608 | """read the url into a string, up to 'limit' bytes | ||
609 | If the limit is exceeded, an exception will be thrown. Note that urlread | ||
610 | is NOT intended to be used as a way of saying "I want the first N bytes" | ||
611 | but rather 'read the whole file into memory, but don't use too much' | ||
612 | |||
613 | See module documentation for a description of possible kwargs. | ||
614 | """ | ||
615 | return default_grabber.urlread(url, limit, **kwargs) | ||
616 | |||
617 | |||
618 | class URLParser: | ||
619 | """Process the URLs before passing them to urllib2. | ||
620 | |||
621 | This class does several things: | ||
622 | |||
623 | * add any prefix | ||
624 | * translate a "raw" file to a proper file: url | ||
625 | * handle any http or https auth that's encoded within the url | ||
626 | * quote the url | ||
627 | |||
628 | Only the "parse" method is called directly, and it calls sub-methods. | ||
629 | |||
630 | An instance of this class is held in the options object, which | ||
631 | means that it's easy to change the behavior by sub-classing and | ||
632 | passing the replacement in. It need only have a method like: | ||
633 | |||
634 | url, parts = urlparser.parse(url, opts) | ||
635 | """ | ||
636 | |||
637 | def parse(self, url, opts): | ||
638 | """parse the url and return the (modified) url and its parts | ||
639 | |||
640 | Note: a raw file WILL be quoted when it's converted to a URL. | ||
641 | However, other urls (ones which come with a proper scheme) may | ||
642 | or may not be quoted according to opts.quote | ||
643 | |||
644 | opts.quote = 1 --> quote it | ||
645 | opts.quote = 0 --> do not quote it | ||
646 | opts.quote = None --> guess | ||
647 | """ | ||
648 | quote = opts.quote | ||
649 | |||
650 | if opts.prefix: | ||
651 | url = self.add_prefix(url, opts.prefix) | ||
652 | |||
653 | parts = urlparse.urlparse(url) | ||
654 | (scheme, host, path, parm, query, frag) = parts | ||
655 | |||
656 | if not scheme or (len(scheme) == 1 and scheme in string.letters): | ||
657 | # if a scheme isn't specified, we guess that it's "file:" | ||
658 | if url[0] not in '/\\': url = os.path.abspath(url) | ||
659 | url = 'file:' + urllib.pathname2url(url) | ||
660 | parts = urlparse.urlparse(url) | ||
661 | quote = 0 # pathname2url quotes, so we won't do it again | ||
662 | |||
663 | if scheme in ['http', 'https']: | ||
664 | parts = self.process_http(parts) | ||
665 | |||
666 | if quote is None: | ||
667 | quote = self.guess_should_quote(parts) | ||
668 | if quote: | ||
669 | parts = self.quote(parts) | ||
670 | |||
671 | url = urlparse.urlunparse(parts) | ||
672 | return url, parts | ||
673 | |||
674 | def add_prefix(self, url, prefix): | ||
675 | if prefix[-1] == '/' or url[0] == '/': | ||
676 | url = prefix + url | ||
677 | else: | ||
678 | url = prefix + '/' + url | ||
679 | return url | ||
680 | |||
681 | def process_http(self, parts): | ||
682 | (scheme, host, path, parm, query, frag) = parts | ||
683 | |||
684 | if '@' in host and auth_handler: | ||
685 | try: | ||
686 | user_pass, host = host.split('@', 1) | ||
687 | if ':' in user_pass: | ||
688 | user, password = user_pass.split(':', 1) | ||
689 | except ValueError, e: | ||
690 | raise URLGrabError(1, _('Bad URL: %s') % url) | ||
691 | if DEBUG: DEBUG.info('adding HTTP auth: %s, XXXXXXXX', user) | ||
692 | auth_handler.add_password(None, host, user, password) | ||
693 | |||
694 | return (scheme, host, path, parm, query, frag) | ||
695 | |||
696 | def quote(self, parts): | ||
697 | """quote the URL | ||
698 | |||
699 | This method quotes ONLY the path part. If you need to quote | ||
700 | other parts, you should override this and pass in your derived | ||
701 | class. The other alternative is to quote other parts before | ||
702 | passing into urlgrabber. | ||
703 | """ | ||
704 | (scheme, host, path, parm, query, frag) = parts | ||
705 | path = urllib.quote(path) | ||
706 | return (scheme, host, path, parm, query, frag) | ||
707 | |||
708 | hexvals = '0123456789ABCDEF' | ||
709 | def guess_should_quote(self, parts): | ||
710 | """ | ||
711 | Guess whether we should quote a path. This amounts to | ||
712 | guessing whether it's already quoted. | ||
713 | |||
714 | find ' ' -> 1 | ||
715 | find '%' -> 1 | ||
716 | find '%XX' -> 0 | ||
717 | else -> 1 | ||
718 | """ | ||
719 | (scheme, host, path, parm, query, frag) = parts | ||
720 | if ' ' in path: | ||
721 | return 1 | ||
722 | ind = string.find(path, '%') | ||
723 | if ind > -1: | ||
724 | while ind > -1: | ||
725 | if len(path) < ind+3: | ||
726 | return 1 | ||
727 | code = path[ind+1:ind+3].upper() | ||
728 | if code[0] not in self.hexvals or \ | ||
729 | code[1] not in self.hexvals: | ||
730 | return 1 | ||
731 | ind = string.find(path, '%', ind+1) | ||
732 | return 0 | ||
733 | return 1 | ||
734 | |||
735 | class URLGrabberOptions: | ||
736 | """Class to ease kwargs handling.""" | ||
737 | |||
738 | def __init__(self, delegate=None, **kwargs): | ||
739 | """Initialize URLGrabberOptions object. | ||
740 | Set default values for all options and then update options specified | ||
741 | in kwargs. | ||
742 | """ | ||
743 | self.delegate = delegate | ||
744 | if delegate is None: | ||
745 | self._set_defaults() | ||
746 | self._set_attributes(**kwargs) | ||
747 | |||
748 | def __getattr__(self, name): | ||
749 | if self.delegate and hasattr(self.delegate, name): | ||
750 | return getattr(self.delegate, name) | ||
751 | raise AttributeError, name | ||
752 | |||
753 | def raw_throttle(self): | ||
754 | """Calculate raw throttle value from throttle and bandwidth | ||
755 | values. | ||
756 | """ | ||
757 | if self.throttle <= 0: | ||
758 | return 0 | ||
759 | elif type(self.throttle) == type(0): | ||
760 | return float(self.throttle) | ||
761 | else: # throttle is a float | ||
762 | return self.bandwidth * self.throttle | ||
763 | |||
764 | def derive(self, **kwargs): | ||
765 | """Create a derived URLGrabberOptions instance. | ||
766 | This method creates a new instance and overrides the | ||
767 | options specified in kwargs. | ||
768 | """ | ||
769 | return URLGrabberOptions(delegate=self, **kwargs) | ||
770 | |||
771 | def _set_attributes(self, **kwargs): | ||
772 | """Update object attributes with those provided in kwargs.""" | ||
773 | self.__dict__.update(kwargs) | ||
774 | if have_range and kwargs.has_key('range'): | ||
775 | # normalize the supplied range value | ||
776 | self.range = range_tuple_normalize(self.range) | ||
777 | if not self.reget in [None, 'simple', 'check_timestamp']: | ||
778 | raise URLGrabError(11, _('Illegal reget mode: %s') \ | ||
779 | % (self.reget, )) | ||
780 | |||
781 | def _set_defaults(self): | ||
782 | """Set all options to their default values. | ||
783 | When adding new options, make sure a default is | ||
784 | provided here. | ||
785 | """ | ||
786 | self.progress_obj = None | ||
787 | self.throttle = 1.0 | ||
788 | self.bandwidth = 0 | ||
789 | self.retry = None | ||
790 | self.retrycodes = [-1,2,4,5,6,7] | ||
791 | self.checkfunc = None | ||
792 | self.copy_local = 0 | ||
793 | self.close_connection = 0 | ||
794 | self.range = None | ||
795 | self.user_agent = 'urlgrabber/%s' % __version__ | ||
796 | self.keepalive = 1 | ||
797 | self.proxies = None | ||
798 | self.reget = None | ||
799 | self.failure_callback = None | ||
800 | self.interrupt_callback = None | ||
801 | self.prefix = None | ||
802 | self.opener = None | ||
803 | self.cache_openers = True | ||
804 | self.timeout = None | ||
805 | self.text = None | ||
806 | self.http_headers = None | ||
807 | self.ftp_headers = None | ||
808 | self.data = None | ||
809 | self.urlparser = URLParser() | ||
810 | self.quote = None | ||
811 | self.ssl_ca_cert = None | ||
812 | self.ssl_context = None | ||
813 | |||
814 | class URLGrabber: | ||
815 | """Provides easy opening of URLs with a variety of options. | ||
816 | |||
817 | All options are specified as kwargs. Options may be specified when | ||
818 | the class is created and may be overridden on a per request basis. | ||
819 | |||
820 | New objects inherit default values from default_grabber. | ||
821 | """ | ||
822 | |||
823 | def __init__(self, **kwargs): | ||
824 | self.opts = URLGrabberOptions(**kwargs) | ||
825 | |||
826 | def _retry(self, opts, func, *args): | ||
827 | tries = 0 | ||
828 | while 1: | ||
829 | # there are only two ways out of this loop. The second has | ||
830 | # several "sub-ways" | ||
831 | # 1) via the return in the "try" block | ||
832 | # 2) by some exception being raised | ||
833 | # a) an excepton is raised that we don't "except" | ||
834 | # b) a callback raises ANY exception | ||
835 | # c) we're not retry-ing or have run out of retries | ||
836 | # d) the URLGrabError code is not in retrycodes | ||
837 | # beware of infinite loops :) | ||
838 | tries = tries + 1 | ||
839 | exception = None | ||
840 | retrycode = None | ||
841 | callback = None | ||
842 | if DEBUG: DEBUG.info('attempt %i/%s: %s', | ||
843 | tries, opts.retry, args[0]) | ||
844 | try: | ||
845 | r = apply(func, (opts,) + args, {}) | ||
846 | if DEBUG: DEBUG.info('success') | ||
847 | return r | ||
848 | except URLGrabError, e: | ||
849 | exception = e | ||
850 | callback = opts.failure_callback | ||
851 | retrycode = e.errno | ||
852 | except KeyboardInterrupt, e: | ||
853 | exception = e | ||
854 | callback = opts.interrupt_callback | ||
855 | |||
856 | if DEBUG: DEBUG.info('exception: %s', exception) | ||
857 | if callback: | ||
858 | if DEBUG: DEBUG.info('calling callback: %s', callback) | ||
859 | cb_func, cb_args, cb_kwargs = self._make_callback(callback) | ||
860 | obj = CallbackObject(exception=exception, url=args[0], | ||
861 | tries=tries, retry=opts.retry) | ||
862 | cb_func(obj, *cb_args, **cb_kwargs) | ||
863 | |||
864 | if (opts.retry is None) or (tries == opts.retry): | ||
865 | if DEBUG: DEBUG.info('retries exceeded, re-raising') | ||
866 | raise | ||
867 | |||
868 | if (retrycode is not None) and (retrycode not in opts.retrycodes): | ||
869 | if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising', | ||
870 | retrycode, opts.retrycodes) | ||
871 | raise | ||
872 | |||
873 | def urlopen(self, url, **kwargs): | ||
874 | """open the url and return a file object | ||
875 | If a progress object or throttle value specified when this | ||
876 | object was created, then a special file object will be | ||
877 | returned that supports them. The file object can be treated | ||
878 | like any other file object. | ||
879 | """ | ||
880 | opts = self.opts.derive(**kwargs) | ||
881 | (url,parts) = opts.urlparser.parse(url, opts) | ||
882 | def retryfunc(opts, url): | ||
883 | return URLGrabberFileObject(url, filename=None, opts=opts) | ||
884 | return self._retry(opts, retryfunc, url) | ||
885 | |||
886 | def urlgrab(self, url, filename=None, **kwargs): | ||
887 | """grab the file at <url> and make a local copy at <filename> | ||
888 | If filename is none, the basename of the url is used. | ||
889 | urlgrab returns the filename of the local file, which may be | ||
890 | different from the passed-in filename if copy_local == 0. | ||
891 | """ | ||
892 | opts = self.opts.derive(**kwargs) | ||
893 | (url,parts) = opts.urlparser.parse(url, opts) | ||
894 | (scheme, host, path, parm, query, frag) = parts | ||
895 | if filename is None: | ||
896 | filename = os.path.basename( urllib.unquote(path) ) | ||
897 | if scheme == 'file' and not opts.copy_local: | ||
898 | # just return the name of the local file - don't make a | ||
899 | # copy currently | ||
900 | path = urllib.url2pathname(path) | ||
901 | if host: | ||
902 | path = os.path.normpath('//' + host + path) | ||
903 | if not os.path.exists(path): | ||
904 | raise URLGrabError(2, | ||
905 | _('Local file does not exist: %s') % (path, )) | ||
906 | elif not os.path.isfile(path): | ||
907 | raise URLGrabError(3, | ||
908 | _('Not a normal file: %s') % (path, )) | ||
909 | elif not opts.range: | ||
910 | return path | ||
911 | |||
912 | def retryfunc(opts, url, filename): | ||
913 | fo = URLGrabberFileObject(url, filename, opts) | ||
914 | try: | ||
915 | fo._do_grab() | ||
916 | if not opts.checkfunc is None: | ||
917 | cb_func, cb_args, cb_kwargs = \ | ||
918 | self._make_callback(opts.checkfunc) | ||
919 | obj = CallbackObject() | ||
920 | obj.filename = filename | ||
921 | obj.url = url | ||
922 | apply(cb_func, (obj, )+cb_args, cb_kwargs) | ||
923 | finally: | ||
924 | fo.close() | ||
925 | return filename | ||
926 | |||
927 | return self._retry(opts, retryfunc, url, filename) | ||
928 | |||
929 | def urlread(self, url, limit=None, **kwargs): | ||
930 | """read the url into a string, up to 'limit' bytes | ||
931 | If the limit is exceeded, an exception will be thrown. Note | ||
932 | that urlread is NOT intended to be used as a way of saying | ||
933 | "I want the first N bytes" but rather 'read the whole file | ||
934 | into memory, but don't use too much' | ||
935 | """ | ||
936 | opts = self.opts.derive(**kwargs) | ||
937 | (url,parts) = opts.urlparser.parse(url, opts) | ||
938 | if limit is not None: | ||
939 | limit = limit + 1 | ||
940 | |||
941 | def retryfunc(opts, url, limit): | ||
942 | fo = URLGrabberFileObject(url, filename=None, opts=opts) | ||
943 | s = '' | ||
944 | try: | ||
945 | # this is an unfortunate thing. Some file-like objects | ||
946 | # have a default "limit" of None, while the built-in (real) | ||
947 | # file objects have -1. They each break the other, so for | ||
948 | # now, we just force the default if necessary. | ||
949 | if limit is None: s = fo.read() | ||
950 | else: s = fo.read(limit) | ||
951 | |||
952 | if not opts.checkfunc is None: | ||
953 | cb_func, cb_args, cb_kwargs = \ | ||
954 | self._make_callback(opts.checkfunc) | ||
955 | obj = CallbackObject() | ||
956 | obj.data = s | ||
957 | obj.url = url | ||
958 | apply(cb_func, (obj, )+cb_args, cb_kwargs) | ||
959 | finally: | ||
960 | fo.close() | ||
961 | return s | ||
962 | |||
963 | s = self._retry(opts, retryfunc, url, limit) | ||
964 | if limit and len(s) > limit: | ||
965 | raise URLGrabError(8, | ||
966 | _('Exceeded limit (%i): %s') % (limit, url)) | ||
967 | return s | ||
968 | |||
969 | def _make_callback(self, callback_obj): | ||
970 | if callable(callback_obj): | ||
971 | return callback_obj, (), {} | ||
972 | else: | ||
973 | return callback_obj | ||
974 | |||
975 | # create the default URLGrabber used by urlXXX functions. | ||
976 | # NOTE: actual defaults are set in URLGrabberOptions | ||
977 | default_grabber = URLGrabber() | ||
978 | |||
979 | class URLGrabberFileObject: | ||
980 | """This is a file-object wrapper that supports progress objects | ||
981 | and throttling. | ||
982 | |||
983 | This exists to solve the following problem: lets say you want to | ||
984 | drop-in replace a normal open with urlopen. You want to use a | ||
985 | progress meter and/or throttling, but how do you do that without | ||
986 | rewriting your code? Answer: urlopen will return a wrapped file | ||
987 | object that does the progress meter and-or throttling internally. | ||
988 | """ | ||
989 | |||
990 | def __init__(self, url, filename, opts): | ||
991 | self.url = url | ||
992 | self.filename = filename | ||
993 | self.opts = opts | ||
994 | self.fo = None | ||
995 | self._rbuf = '' | ||
996 | self._rbufsize = 1024*8 | ||
997 | self._ttime = time.time() | ||
998 | self._tsize = 0 | ||
999 | self._amount_read = 0 | ||
1000 | self._opener = None | ||
1001 | self._do_open() | ||
1002 | |||
1003 | def __getattr__(self, name): | ||
1004 | """This effectively allows us to wrap at the instance level. | ||
1005 | Any attribute not found in _this_ object will be searched for | ||
1006 | in self.fo. This includes methods.""" | ||
1007 | if hasattr(self.fo, name): | ||
1008 | return getattr(self.fo, name) | ||
1009 | raise AttributeError, name | ||
1010 | |||
1011 | def _get_opener(self): | ||
1012 | """Build a urllib2 OpenerDirector based on request options.""" | ||
1013 | if self.opts.opener: | ||
1014 | return self.opts.opener | ||
1015 | elif self._opener is None: | ||
1016 | handlers = [] | ||
1017 | need_keepalive_handler = (have_keepalive and self.opts.keepalive) | ||
1018 | need_range_handler = (range_handlers and \ | ||
1019 | (self.opts.range or self.opts.reget)) | ||
1020 | # if you specify a ProxyHandler when creating the opener | ||
1021 | # it _must_ come before all other handlers in the list or urllib2 | ||
1022 | # chokes. | ||
1023 | if self.opts.proxies: | ||
1024 | handlers.append( CachedProxyHandler(self.opts.proxies) ) | ||
1025 | |||
1026 | # ------------------------------------------------------- | ||
1027 | # OK, these next few lines are a serious kludge to get | ||
1028 | # around what I think is a bug in python 2.2's | ||
1029 | # urllib2. The basic idea is that default handlers | ||
1030 | # get applied first. If you override one (like a | ||
1031 | # proxy handler), then the default gets pulled, but | ||
1032 | # the replacement goes on the end. In the case of | ||
1033 | # proxies, this means the normal handler picks it up | ||
1034 | # first and the proxy isn't used. Now, this probably | ||
1035 | # only happened with ftp or non-keepalive http, so not | ||
1036 | # many folks saw it. The simple approach to fixing it | ||
1037 | # is just to make sure you override the other | ||
1038 | # conflicting defaults as well. I would LOVE to see | ||
1039 | # these go way or be dealt with more elegantly. The | ||
1040 | # problem isn't there after 2.2. -MDS 2005/02/24 | ||
1041 | if not need_keepalive_handler: | ||
1042 | handlers.append( urllib2.HTTPHandler() ) | ||
1043 | if not need_range_handler: | ||
1044 | handlers.append( urllib2.FTPHandler() ) | ||
1045 | # ------------------------------------------------------- | ||
1046 | |||
1047 | ssl_factory = sslfactory.get_factory(self.opts.ssl_ca_cert, | ||
1048 | self.opts.ssl_context) | ||
1049 | |||
1050 | if need_keepalive_handler: | ||
1051 | handlers.append(HTTPHandler()) | ||
1052 | handlers.append(HTTPSHandler(ssl_factory)) | ||
1053 | if need_range_handler: | ||
1054 | handlers.extend( range_handlers ) | ||
1055 | handlers.append( auth_handler ) | ||
1056 | if self.opts.cache_openers: | ||
1057 | self._opener = CachedOpenerDirector(ssl_factory, *handlers) | ||
1058 | else: | ||
1059 | self._opener = ssl_factory.create_opener(*handlers) | ||
1060 | # OK, I don't like to do this, but otherwise, we end up with | ||
1061 | # TWO user-agent headers. | ||
1062 | self._opener.addheaders = [] | ||
1063 | return self._opener | ||
1064 | |||
1065 | def _do_open(self): | ||
1066 | opener = self._get_opener() | ||
1067 | |||
1068 | req = urllib2.Request(self.url, self.opts.data) # build request object | ||
1069 | self._add_headers(req) # add misc headers that we need | ||
1070 | self._build_range(req) # take care of reget and byterange stuff | ||
1071 | |||
1072 | fo, hdr = self._make_request(req, opener) | ||
1073 | if self.reget_time and self.opts.reget == 'check_timestamp': | ||
1074 | # do this if we have a local file with known timestamp AND | ||
1075 | # we're in check_timestamp reget mode. | ||
1076 | fetch_again = 0 | ||
1077 | try: | ||
1078 | modified_tuple = hdr.getdate_tz('last-modified') | ||
1079 | modified_stamp = rfc822.mktime_tz(modified_tuple) | ||
1080 | if modified_stamp > self.reget_time: fetch_again = 1 | ||
1081 | except (TypeError,): | ||
1082 | fetch_again = 1 | ||
1083 | |||
1084 | if fetch_again: | ||
1085 | # the server version is newer than the (incomplete) local | ||
1086 | # version, so we should abandon the version we're getting | ||
1087 | # and fetch the whole thing again. | ||
1088 | fo.close() | ||
1089 | self.opts.reget = None | ||
1090 | del req.headers['Range'] | ||
1091 | self._build_range(req) | ||
1092 | fo, hdr = self._make_request(req, opener) | ||
1093 | |||
1094 | (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url) | ||
1095 | path = urllib.unquote(path) | ||
1096 | if not (self.opts.progress_obj or self.opts.raw_throttle() \ | ||
1097 | or self.opts.timeout): | ||
1098 | # if we're not using the progress_obj, throttling, or timeout | ||
1099 | # we can get a performance boost by going directly to | ||
1100 | # the underlying fileobject for reads. | ||
1101 | self.read = fo.read | ||
1102 | if hasattr(fo, 'readline'): | ||
1103 | self.readline = fo.readline | ||
1104 | elif self.opts.progress_obj: | ||
1105 | try: | ||
1106 | length = int(hdr['Content-Length']) | ||
1107 | length = length + self._amount_read # Account for regets | ||
1108 | except (KeyError, ValueError, TypeError): | ||
1109 | length = None | ||
1110 | |||
1111 | self.opts.progress_obj.start(str(self.filename), | ||
1112 | urllib.unquote(self.url), | ||
1113 | os.path.basename(path), | ||
1114 | length, text=self.opts.text) | ||
1115 | self.opts.progress_obj.update(0) | ||
1116 | (self.fo, self.hdr) = (fo, hdr) | ||
1117 | |||
1118 | def _add_headers(self, req): | ||
1119 | if self.opts.user_agent: | ||
1120 | req.add_header('User-agent', self.opts.user_agent) | ||
1121 | try: req_type = req.get_type() | ||
1122 | except ValueError: req_type = None | ||
1123 | if self.opts.http_headers and req_type in ('http', 'https'): | ||
1124 | for h, v in self.opts.http_headers: | ||
1125 | req.add_header(h, v) | ||
1126 | if self.opts.ftp_headers and req_type == 'ftp': | ||
1127 | for h, v in self.opts.ftp_headers: | ||
1128 | req.add_header(h, v) | ||
1129 | |||
1130 | def _build_range(self, req): | ||
1131 | self.reget_time = None | ||
1132 | self.append = 0 | ||
1133 | reget_length = 0 | ||
1134 | rt = None | ||
1135 | if have_range and self.opts.reget and type(self.filename) == type(''): | ||
1136 | # we have reget turned on and we're dumping to a file | ||
1137 | try: | ||
1138 | s = os.stat(self.filename) | ||
1139 | except OSError: | ||
1140 | pass | ||
1141 | else: | ||
1142 | self.reget_time = s[ST_MTIME] | ||
1143 | reget_length = s[ST_SIZE] | ||
1144 | |||
1145 | # Set initial length when regetting | ||
1146 | self._amount_read = reget_length | ||
1147 | |||
1148 | rt = reget_length, '' | ||
1149 | self.append = 1 | ||
1150 | |||
1151 | if self.opts.range: | ||
1152 | if not have_range: | ||
1153 | raise URLGrabError(10, _('Byte range requested but range '\ | ||
1154 | 'support unavailable')) | ||
1155 | rt = self.opts.range | ||
1156 | if rt[0]: rt = (rt[0] + reget_length, rt[1]) | ||
1157 | |||
1158 | if rt: | ||
1159 | header = range_tuple_to_header(rt) | ||
1160 | if header: req.add_header('Range', header) | ||
1161 | |||
1162 | def _make_request(self, req, opener): | ||
1163 | try: | ||
1164 | if have_socket_timeout and self.opts.timeout: | ||
1165 | old_to = socket.getdefaulttimeout() | ||
1166 | socket.setdefaulttimeout(self.opts.timeout) | ||
1167 | try: | ||
1168 | fo = opener.open(req) | ||
1169 | finally: | ||
1170 | socket.setdefaulttimeout(old_to) | ||
1171 | else: | ||
1172 | fo = opener.open(req) | ||
1173 | hdr = fo.info() | ||
1174 | except ValueError, e: | ||
1175 | raise URLGrabError(1, _('Bad URL: %s') % (e, )) | ||
1176 | except RangeError, e: | ||
1177 | raise URLGrabError(9, str(e)) | ||
1178 | except urllib2.HTTPError, e: | ||
1179 | new_e = URLGrabError(14, str(e)) | ||
1180 | new_e.code = e.code | ||
1181 | new_e.exception = e | ||
1182 | raise new_e | ||
1183 | except IOError, e: | ||
1184 | if hasattr(e, 'reason') and have_socket_timeout and \ | ||
1185 | isinstance(e.reason, TimeoutError): | ||
1186 | raise URLGrabError(12, _('Timeout: %s') % (e, )) | ||
1187 | else: | ||
1188 | raise URLGrabError(4, _('IOError: %s') % (e, )) | ||
1189 | except OSError, e: | ||
1190 | raise URLGrabError(5, _('OSError: %s') % (e, )) | ||
1191 | except HTTPException, e: | ||
1192 | raise URLGrabError(7, _('HTTP Exception (%s): %s') % \ | ||
1193 | (e.__class__.__name__, e)) | ||
1194 | else: | ||
1195 | return (fo, hdr) | ||
1196 | |||
1197 | def _do_grab(self): | ||
1198 | """dump the file to self.filename.""" | ||
1199 | if self.append: new_fo = open(self.filename, 'ab') | ||
1200 | else: new_fo = open(self.filename, 'wb') | ||
1201 | bs = 1024*8 | ||
1202 | size = 0 | ||
1203 | |||
1204 | block = self.read(bs) | ||
1205 | size = size + len(block) | ||
1206 | while block: | ||
1207 | new_fo.write(block) | ||
1208 | block = self.read(bs) | ||
1209 | size = size + len(block) | ||
1210 | |||
1211 | new_fo.close() | ||
1212 | try: | ||
1213 | modified_tuple = self.hdr.getdate_tz('last-modified') | ||
1214 | modified_stamp = rfc822.mktime_tz(modified_tuple) | ||
1215 | os.utime(self.filename, (modified_stamp, modified_stamp)) | ||
1216 | except (TypeError,), e: pass | ||
1217 | |||
1218 | return size | ||
1219 | |||
1220 | def _fill_buffer(self, amt=None): | ||
1221 | """fill the buffer to contain at least 'amt' bytes by reading | ||
1222 | from the underlying file object. If amt is None, then it will | ||
1223 | read until it gets nothing more. It updates the progress meter | ||
1224 | and throttles after every self._rbufsize bytes.""" | ||
1225 | # the _rbuf test is only in this first 'if' for speed. It's not | ||
1226 | # logically necessary | ||
1227 | if self._rbuf and not amt is None: | ||
1228 | L = len(self._rbuf) | ||
1229 | if amt > L: | ||
1230 | amt = amt - L | ||
1231 | else: | ||
1232 | return | ||
1233 | |||
1234 | # if we've made it here, then we don't have enough in the buffer | ||
1235 | # and we need to read more. | ||
1236 | |||
1237 | buf = [self._rbuf] | ||
1238 | bufsize = len(self._rbuf) | ||
1239 | while amt is None or amt: | ||
1240 | # first, delay if necessary for throttling reasons | ||
1241 | if self.opts.raw_throttle(): | ||
1242 | diff = self._tsize/self.opts.raw_throttle() - \ | ||
1243 | (time.time() - self._ttime) | ||
1244 | if diff > 0: time.sleep(diff) | ||
1245 | self._ttime = time.time() | ||
1246 | |||
1247 | # now read some data, up to self._rbufsize | ||
1248 | if amt is None: readamount = self._rbufsize | ||
1249 | else: readamount = min(amt, self._rbufsize) | ||
1250 | try: | ||
1251 | new = self.fo.read(readamount) | ||
1252 | except socket.error, e: | ||
1253 | raise URLGrabError(4, _('Socket Error: %s') % (e, )) | ||
1254 | except TimeoutError, e: | ||
1255 | raise URLGrabError(12, _('Timeout: %s') % (e, )) | ||
1256 | except IOError, e: | ||
1257 | raise URLGrabError(4, _('IOError: %s') %(e,)) | ||
1258 | newsize = len(new) | ||
1259 | if not newsize: break # no more to read | ||
1260 | |||
1261 | if amt: amt = amt - newsize | ||
1262 | buf.append(new) | ||
1263 | bufsize = bufsize + newsize | ||
1264 | self._tsize = newsize | ||
1265 | self._amount_read = self._amount_read + newsize | ||
1266 | if self.opts.progress_obj: | ||
1267 | self.opts.progress_obj.update(self._amount_read) | ||
1268 | |||
1269 | self._rbuf = string.join(buf, '') | ||
1270 | return | ||
1271 | |||
1272 | def read(self, amt=None): | ||
1273 | self._fill_buffer(amt) | ||
1274 | if amt is None: | ||
1275 | s, self._rbuf = self._rbuf, '' | ||
1276 | else: | ||
1277 | s, self._rbuf = self._rbuf[:amt], self._rbuf[amt:] | ||
1278 | return s | ||
1279 | |||
1280 | def readline(self, limit=-1): | ||
1281 | i = string.find(self._rbuf, '\n') | ||
1282 | while i < 0 and not (0 < limit <= len(self._rbuf)): | ||
1283 | L = len(self._rbuf) | ||
1284 | self._fill_buffer(L + self._rbufsize) | ||
1285 | if not len(self._rbuf) > L: break | ||
1286 | i = string.find(self._rbuf, '\n', L) | ||
1287 | |||
1288 | if i < 0: i = len(self._rbuf) | ||
1289 | else: i = i+1 | ||
1290 | if 0 <= limit < len(self._rbuf): i = limit | ||
1291 | |||
1292 | s, self._rbuf = self._rbuf[:i], self._rbuf[i:] | ||
1293 | return s | ||
1294 | |||
1295 | def close(self): | ||
1296 | if self.opts.progress_obj: | ||
1297 | self.opts.progress_obj.end(self._amount_read) | ||
1298 | self.fo.close() | ||
1299 | if self.opts.close_connection: | ||
1300 | try: self.fo.close_connection() | ||
1301 | except: pass | ||
1302 | |||
1303 | _handler_cache = [] | ||
1304 | def CachedOpenerDirector(ssl_factory = None, *handlers): | ||
1305 | for (cached_handlers, opener) in _handler_cache: | ||
1306 | if cached_handlers == handlers: | ||
1307 | for handler in opener.handlers: | ||
1308 | handler.add_parent(opener) | ||
1309 | return opener | ||
1310 | if not ssl_factory: | ||
1311 | ssl_factory = sslfactory.get_factory() | ||
1312 | opener = ssl_factory.create_opener(*handlers) | ||
1313 | _handler_cache.append( (handlers, opener) ) | ||
1314 | return opener | ||
1315 | |||
1316 | _proxy_cache = [] | ||
1317 | def CachedProxyHandler(proxies): | ||
1318 | for (pdict, handler) in _proxy_cache: | ||
1319 | if pdict == proxies: | ||
1320 | if DEBUG: DEBUG.debug('re-using proxy settings: %s', proxies) | ||
1321 | break | ||
1322 | else: | ||
1323 | for k, v in proxies.items(): | ||
1324 | utype, url = urllib.splittype(v) | ||
1325 | host, other = urllib.splithost(url) | ||
1326 | if (utype is None) or (host is None): | ||
1327 | raise URLGrabError(13, _('Bad proxy URL: %s') % v) | ||
1328 | |||
1329 | if DEBUG: DEBUG.info('creating new proxy handler: %s', proxies) | ||
1330 | handler = urllib2.ProxyHandler(proxies) | ||
1331 | _proxy_cache.append( (proxies, handler) ) | ||
1332 | return handler | ||
1333 | |||
1334 | ##################################################################### | ||
1335 | # DEPRECATED FUNCTIONS | ||
1336 | def set_throttle(new_throttle): | ||
1337 | """Deprecated. Use: default_grabber.throttle = new_throttle""" | ||
1338 | default_grabber.throttle = new_throttle | ||
1339 | |||
1340 | def set_bandwidth(new_bandwidth): | ||
1341 | """Deprecated. Use: default_grabber.bandwidth = new_bandwidth""" | ||
1342 | default_grabber.bandwidth = new_bandwidth | ||
1343 | |||
1344 | def set_progress_obj(new_progress_obj): | ||
1345 | """Deprecated. Use: default_grabber.progress_obj = new_progress_obj""" | ||
1346 | default_grabber.progress_obj = new_progress_obj | ||
1347 | |||
1348 | def set_user_agent(new_user_agent): | ||
1349 | """Deprecated. Use: default_grabber.user_agent = new_user_agent""" | ||
1350 | default_grabber.user_agent = new_user_agent | ||
1351 | |||
1352 | def retrygrab(url, filename=None, copy_local=0, close_connection=0, | ||
1353 | progress_obj=None, throttle=None, bandwidth=None, | ||
1354 | numtries=3, retrycodes=[-1,2,4,5,6,7], checkfunc=None): | ||
1355 | """Deprecated. Use: urlgrab() with the retry arg instead""" | ||
1356 | kwargs = {'copy_local' : copy_local, | ||
1357 | 'close_connection' : close_connection, | ||
1358 | 'progress_obj' : progress_obj, | ||
1359 | 'throttle' : throttle, | ||
1360 | 'bandwidth' : bandwidth, | ||
1361 | 'retry' : numtries, | ||
1362 | 'retrycodes' : retrycodes, | ||
1363 | 'checkfunc' : checkfunc | ||
1364 | } | ||
1365 | return urlgrab(url, filename, **kwargs) | ||
1366 | |||
1367 | |||
1368 | ##################################################################### | ||
1369 | # TESTING | ||
1370 | def _main_test(): | ||
1371 | import sys | ||
1372 | try: url, filename = sys.argv[1:3] | ||
1373 | except ValueError: | ||
1374 | print 'usage:', sys.argv[0], \ | ||
1375 | '<url> <filename> [copy_local=0|1] [close_connection=0|1]' | ||
1376 | sys.exit() | ||
1377 | |||
1378 | kwargs = {} | ||
1379 | for a in sys.argv[3:]: | ||
1380 | k, v = string.split(a, '=', 1) | ||
1381 | kwargs[k] = int(v) | ||
1382 | |||
1383 | set_throttle(1.0) | ||
1384 | set_bandwidth(32 * 1024) | ||
1385 | print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle, | ||
1386 | default_grabber.bandwidth) | ||
1387 | |||
1388 | try: from progress import text_progress_meter | ||
1389 | except ImportError, e: pass | ||
1390 | else: kwargs['progress_obj'] = text_progress_meter() | ||
1391 | |||
1392 | try: name = apply(urlgrab, (url, filename), kwargs) | ||
1393 | except URLGrabError, e: print e | ||
1394 | else: print 'LOCAL FILE:', name | ||
1395 | |||
1396 | |||
1397 | def _retry_test(): | ||
1398 | import sys | ||
1399 | try: url, filename = sys.argv[1:3] | ||
1400 | except ValueError: | ||
1401 | print 'usage:', sys.argv[0], \ | ||
1402 | '<url> <filename> [copy_local=0|1] [close_connection=0|1]' | ||
1403 | sys.exit() | ||
1404 | |||
1405 | kwargs = {} | ||
1406 | for a in sys.argv[3:]: | ||
1407 | k, v = string.split(a, '=', 1) | ||
1408 | kwargs[k] = int(v) | ||
1409 | |||
1410 | try: from progress import text_progress_meter | ||
1411 | except ImportError, e: pass | ||
1412 | else: kwargs['progress_obj'] = text_progress_meter() | ||
1413 | |||
1414 | def cfunc(filename, hello, there='foo'): | ||
1415 | print hello, there | ||
1416 | import random | ||
1417 | rnum = random.random() | ||
1418 | if rnum < .5: | ||
1419 | print 'forcing retry' | ||
1420 | raise URLGrabError(-1, 'forcing retry') | ||
1421 | if rnum < .75: | ||
1422 | print 'forcing failure' | ||
1423 | raise URLGrabError(-2, 'forcing immediate failure') | ||
1424 | print 'success' | ||
1425 | return | ||
1426 | |||
1427 | kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'}) | ||
1428 | try: name = apply(retrygrab, (url, filename), kwargs) | ||
1429 | except URLGrabError, e: print e | ||
1430 | else: print 'LOCAL FILE:', name | ||
1431 | |||
1432 | def _file_object_test(filename=None): | ||
1433 | import random, cStringIO, sys | ||
1434 | if filename is None: | ||
1435 | filename = __file__ | ||
1436 | print 'using file "%s" for comparisons' % filename | ||
1437 | fo = open(filename) | ||
1438 | s_input = fo.read() | ||
1439 | fo.close() | ||
1440 | |||
1441 | for testfunc in [_test_file_object_smallread, | ||
1442 | _test_file_object_readall, | ||
1443 | _test_file_object_readline, | ||
1444 | _test_file_object_readlines]: | ||
1445 | fo_input = cStringIO.StringIO(s_input) | ||
1446 | fo_output = cStringIO.StringIO() | ||
1447 | wrapper = URLGrabberFileObject(fo_input, None, 0) | ||
1448 | print 'testing %-30s ' % testfunc.__name__, | ||
1449 | testfunc(wrapper, fo_output) | ||
1450 | s_output = fo_output.getvalue() | ||
1451 | if s_output == s_input: print 'passed' | ||
1452 | else: print 'FAILED' | ||
1453 | |||
1454 | def _test_file_object_smallread(wrapper, fo_output): | ||
1455 | while 1: | ||
1456 | s = wrapper.read(23) | ||
1457 | fo_output.write(s) | ||
1458 | if not s: return | ||
1459 | |||
1460 | def _test_file_object_readall(wrapper, fo_output): | ||
1461 | s = wrapper.read() | ||
1462 | fo_output.write(s) | ||
1463 | |||
1464 | def _test_file_object_readline(wrapper, fo_output): | ||
1465 | while 1: | ||
1466 | s = wrapper.readline() | ||
1467 | fo_output.write(s) | ||
1468 | if not s: return | ||
1469 | |||
1470 | def _test_file_object_readlines(wrapper, fo_output): | ||
1471 | li = wrapper.readlines() | ||
1472 | fo_output.write(string.join(li, '')) | ||
1473 | |||
1474 | if __name__ == '__main__': | ||
1475 | _main_test() | ||
1476 | _retry_test() | ||
1477 | _file_object_test('test') | ||
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py deleted file mode 100644 index 71393e2b8d..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py +++ /dev/null | |||
@@ -1,617 +0,0 @@ | |||
1 | # This library is free software; you can redistribute it and/or | ||
2 | # modify it under the terms of the GNU Lesser General Public | ||
3 | # License as published by the Free Software Foundation; either | ||
4 | # version 2.1 of the License, or (at your option) any later version. | ||
5 | # | ||
6 | # This library is distributed in the hope that it will be useful, | ||
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
9 | # Lesser General Public License for more details. | ||
10 | # | ||
11 | # You should have received a copy of the GNU Lesser General Public | ||
12 | # License along with this library; if not, write to the | ||
13 | # Free Software Foundation, Inc., | ||
14 | # 59 Temple Place, Suite 330, | ||
15 | # Boston, MA 02111-1307 USA | ||
16 | |||
17 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||
18 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | ||
19 | |||
20 | """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive. | ||
21 | |||
22 | >>> import urllib2 | ||
23 | >>> from keepalive import HTTPHandler | ||
24 | >>> keepalive_handler = HTTPHandler() | ||
25 | >>> opener = urllib2.build_opener(keepalive_handler) | ||
26 | >>> urllib2.install_opener(opener) | ||
27 | >>> | ||
28 | >>> fo = urllib2.urlopen('http://www.python.org') | ||
29 | |||
30 | If a connection to a given host is requested, and all of the existing | ||
31 | connections are still in use, another connection will be opened. If | ||
32 | the handler tries to use an existing connection but it fails in some | ||
33 | way, it will be closed and removed from the pool. | ||
34 | |||
35 | To remove the handler, simply re-run build_opener with no arguments, and | ||
36 | install that opener. | ||
37 | |||
38 | You can explicitly close connections by using the close_connection() | ||
39 | method of the returned file-like object (described below) or you can | ||
40 | use the handler methods: | ||
41 | |||
42 | close_connection(host) | ||
43 | close_all() | ||
44 | open_connections() | ||
45 | |||
46 | NOTE: using the close_connection and close_all methods of the handler | ||
47 | should be done with care when using multiple threads. | ||
48 | * there is nothing that prevents another thread from creating new | ||
49 | connections immediately after connections are closed | ||
50 | * no checks are done to prevent in-use connections from being closed | ||
51 | |||
52 | >>> keepalive_handler.close_all() | ||
53 | |||
54 | EXTRA ATTRIBUTES AND METHODS | ||
55 | |||
56 | Upon a status of 200, the object returned has a few additional | ||
57 | attributes and methods, which should not be used if you want to | ||
58 | remain consistent with the normal urllib2-returned objects: | ||
59 | |||
60 | close_connection() - close the connection to the host | ||
61 | readlines() - you know, readlines() | ||
62 | status - the return status (ie 404) | ||
63 | reason - english translation of status (ie 'File not found') | ||
64 | |||
65 | If you want the best of both worlds, use this inside an | ||
66 | AttributeError-catching try: | ||
67 | |||
68 | >>> try: status = fo.status | ||
69 | >>> except AttributeError: status = None | ||
70 | |||
71 | Unfortunately, these are ONLY there if status == 200, so it's not | ||
72 | easy to distinguish between non-200 responses. The reason is that | ||
73 | urllib2 tries to do clever things with error codes 301, 302, 401, | ||
74 | and 407, and it wraps the object upon return. | ||
75 | |||
76 | For python versions earlier than 2.4, you can avoid this fancy error | ||
77 | handling by setting the module-level global HANDLE_ERRORS to zero. | ||
78 | You see, prior to 2.4, it's the HTTP Handler's job to determine what | ||
79 | to handle specially, and what to just pass up. HANDLE_ERRORS == 0 | ||
80 | means "pass everything up". In python 2.4, however, this job no | ||
81 | longer belongs to the HTTP Handler and is now done by a NEW handler, | ||
82 | HTTPErrorProcessor. Here's the bottom line: | ||
83 | |||
84 | python version < 2.4 | ||
85 | HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as | ||
86 | errors | ||
87 | HANDLE_ERRORS == 0 pass everything up, error processing is | ||
88 | left to the calling code | ||
89 | python version >= 2.4 | ||
90 | HANDLE_ERRORS == 1 pass up 200, treat the rest as errors | ||
91 | HANDLE_ERRORS == 0 (default) pass everything up, let the | ||
92 | other handlers (specifically, | ||
93 | HTTPErrorProcessor) decide what to do | ||
94 | |||
95 | In practice, setting the variable either way makes little difference | ||
96 | in python 2.4, so for the most consistent behavior across versions, | ||
97 | you probably just want to use the defaults, which will give you | ||
98 | exceptions on errors. | ||
99 | |||
100 | """ | ||
101 | |||
102 | # $Id: keepalive.py,v 1.16 2006/09/22 00:58:05 mstenner Exp $ | ||
103 | |||
104 | import urllib2 | ||
105 | import httplib | ||
106 | import socket | ||
107 | import thread | ||
108 | |||
109 | DEBUG = None | ||
110 | |||
111 | import sslfactory | ||
112 | |||
113 | import sys | ||
114 | if sys.version_info < (2, 4): HANDLE_ERRORS = 1 | ||
115 | else: HANDLE_ERRORS = 0 | ||
116 | |||
117 | class ConnectionManager: | ||
118 | """ | ||
119 | The connection manager must be able to: | ||
120 | * keep track of all existing | ||
121 | """ | ||
122 | def __init__(self): | ||
123 | self._lock = thread.allocate_lock() | ||
124 | self._hostmap = {} # map hosts to a list of connections | ||
125 | self._connmap = {} # map connections to host | ||
126 | self._readymap = {} # map connection to ready state | ||
127 | |||
128 | def add(self, host, connection, ready): | ||
129 | self._lock.acquire() | ||
130 | try: | ||
131 | if not self._hostmap.has_key(host): self._hostmap[host] = [] | ||
132 | self._hostmap[host].append(connection) | ||
133 | self._connmap[connection] = host | ||
134 | self._readymap[connection] = ready | ||
135 | finally: | ||
136 | self._lock.release() | ||
137 | |||
138 | def remove(self, connection): | ||
139 | self._lock.acquire() | ||
140 | try: | ||
141 | try: | ||
142 | host = self._connmap[connection] | ||
143 | except KeyError: | ||
144 | pass | ||
145 | else: | ||
146 | del self._connmap[connection] | ||
147 | del self._readymap[connection] | ||
148 | self._hostmap[host].remove(connection) | ||
149 | if not self._hostmap[host]: del self._hostmap[host] | ||
150 | finally: | ||
151 | self._lock.release() | ||
152 | |||
153 | def set_ready(self, connection, ready): | ||
154 | try: self._readymap[connection] = ready | ||
155 | except KeyError: pass | ||
156 | |||
157 | def get_ready_conn(self, host): | ||
158 | conn = None | ||
159 | self._lock.acquire() | ||
160 | try: | ||
161 | if self._hostmap.has_key(host): | ||
162 | for c in self._hostmap[host]: | ||
163 | if self._readymap[c]: | ||
164 | self._readymap[c] = 0 | ||
165 | conn = c | ||
166 | break | ||
167 | finally: | ||
168 | self._lock.release() | ||
169 | return conn | ||
170 | |||
171 | def get_all(self, host=None): | ||
172 | if host: | ||
173 | return list(self._hostmap.get(host, [])) | ||
174 | else: | ||
175 | return dict(self._hostmap) | ||
176 | |||
177 | class KeepAliveHandler: | ||
178 | def __init__(self): | ||
179 | self._cm = ConnectionManager() | ||
180 | |||
181 | #### Connection Management | ||
182 | def open_connections(self): | ||
183 | """return a list of connected hosts and the number of connections | ||
184 | to each. [('foo.com:80', 2), ('bar.org', 1)]""" | ||
185 | return [(host, len(li)) for (host, li) in self._cm.get_all().items()] | ||
186 | |||
187 | def close_connection(self, host): | ||
188 | """close connection(s) to <host> | ||
189 | host is the host:port spec, as in 'www.cnn.com:8080' as passed in. | ||
190 | no error occurs if there is no connection to that host.""" | ||
191 | for h in self._cm.get_all(host): | ||
192 | self._cm.remove(h) | ||
193 | h.close() | ||
194 | |||
195 | def close_all(self): | ||
196 | """close all open connections""" | ||
197 | for host, conns in self._cm.get_all().items(): | ||
198 | for h in conns: | ||
199 | self._cm.remove(h) | ||
200 | h.close() | ||
201 | |||
202 | def _request_closed(self, request, host, connection): | ||
203 | """tells us that this request is now closed and the the | ||
204 | connection is ready for another request""" | ||
205 | self._cm.set_ready(connection, 1) | ||
206 | |||
207 | def _remove_connection(self, host, connection, close=0): | ||
208 | if close: connection.close() | ||
209 | self._cm.remove(connection) | ||
210 | |||
211 | #### Transaction Execution | ||
212 | def do_open(self, req): | ||
213 | host = req.get_host() | ||
214 | if not host: | ||
215 | raise urllib2.URLError('no host given') | ||
216 | |||
217 | try: | ||
218 | h = self._cm.get_ready_conn(host) | ||
219 | while h: | ||
220 | r = self._reuse_connection(h, req, host) | ||
221 | |||
222 | # if this response is non-None, then it worked and we're | ||
223 | # done. Break out, skipping the else block. | ||
224 | if r: break | ||
225 | |||
226 | # connection is bad - possibly closed by server | ||
227 | # discard it and ask for the next free connection | ||
228 | h.close() | ||
229 | self._cm.remove(h) | ||
230 | h = self._cm.get_ready_conn(host) | ||
231 | else: | ||
232 | # no (working) free connections were found. Create a new one. | ||
233 | h = self._get_connection(host) | ||
234 | if DEBUG: DEBUG.info("creating new connection to %s (%d)", | ||
235 | host, id(h)) | ||
236 | self._cm.add(host, h, 0) | ||
237 | self._start_transaction(h, req) | ||
238 | r = h.getresponse() | ||
239 | except (socket.error, httplib.HTTPException), err: | ||
240 | raise urllib2.URLError(err) | ||
241 | |||
242 | # if not a persistent connection, don't try to reuse it | ||
243 | if r.will_close: self._cm.remove(h) | ||
244 | |||
245 | if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason) | ||
246 | r._handler = self | ||
247 | r._host = host | ||
248 | r._url = req.get_full_url() | ||
249 | r._connection = h | ||
250 | r.code = r.status | ||
251 | r.headers = r.msg | ||
252 | r.msg = r.reason | ||
253 | |||
254 | if r.status == 200 or not HANDLE_ERRORS: | ||
255 | return r | ||
256 | else: | ||
257 | return self.parent.error('http', req, r, | ||
258 | r.status, r.msg, r.headers) | ||
259 | |||
260 | def _reuse_connection(self, h, req, host): | ||
261 | """start the transaction with a re-used connection | ||
262 | return a response object (r) upon success or None on failure. | ||
263 | This DOES not close or remove bad connections in cases where | ||
264 | it returns. However, if an unexpected exception occurs, it | ||
265 | will close and remove the connection before re-raising. | ||
266 | """ | ||
267 | try: | ||
268 | self._start_transaction(h, req) | ||
269 | r = h.getresponse() | ||
270 | # note: just because we got something back doesn't mean it | ||
271 | # worked. We'll check the version below, too. | ||
272 | except (socket.error, httplib.HTTPException): | ||
273 | r = None | ||
274 | except: | ||
275 | # adding this block just in case we've missed | ||
276 | # something we will still raise the exception, but | ||
277 | # lets try and close the connection and remove it | ||
278 | # first. We previously got into a nasty loop | ||
279 | # where an exception was uncaught, and so the | ||
280 | # connection stayed open. On the next try, the | ||
281 | # same exception was raised, etc. The tradeoff is | ||
282 | # that it's now possible this call will raise | ||
283 | # a DIFFERENT exception | ||
284 | if DEBUG: DEBUG.error("unexpected exception - closing " + \ | ||
285 | "connection to %s (%d)", host, id(h)) | ||
286 | self._cm.remove(h) | ||
287 | h.close() | ||
288 | raise | ||
289 | |||
290 | if r is None or r.version == 9: | ||
291 | # httplib falls back to assuming HTTP 0.9 if it gets a | ||
292 | # bad header back. This is most likely to happen if | ||
293 | # the socket has been closed by the server since we | ||
294 | # last used the connection. | ||
295 | if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)", | ||
296 | host, id(h)) | ||
297 | r = None | ||
298 | else: | ||
299 | if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h)) | ||
300 | |||
301 | return r | ||
302 | |||
303 | def _start_transaction(self, h, req): | ||
304 | try: | ||
305 | if req.has_data(): | ||
306 | data = req.get_data() | ||
307 | h.putrequest('POST', req.get_selector()) | ||
308 | if not req.headers.has_key('Content-type'): | ||
309 | h.putheader('Content-type', | ||
310 | 'application/x-www-form-urlencoded') | ||
311 | if not req.headers.has_key('Content-length'): | ||
312 | h.putheader('Content-length', '%d' % len(data)) | ||
313 | else: | ||
314 | h.putrequest('GET', req.get_selector()) | ||
315 | except (socket.error, httplib.HTTPException), err: | ||
316 | raise urllib2.URLError(err) | ||
317 | |||
318 | for args in self.parent.addheaders: | ||
319 | h.putheader(*args) | ||
320 | for k, v in req.headers.items(): | ||
321 | h.putheader(k, v) | ||
322 | h.endheaders() | ||
323 | if req.has_data(): | ||
324 | h.send(data) | ||
325 | |||
326 | def _get_connection(self, host): | ||
327 | return NotImplementedError | ||
328 | |||
329 | class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler): | ||
330 | def __init__(self): | ||
331 | KeepAliveHandler.__init__(self) | ||
332 | |||
333 | def http_open(self, req): | ||
334 | return self.do_open(req) | ||
335 | |||
336 | def _get_connection(self, host): | ||
337 | return HTTPConnection(host) | ||
338 | |||
339 | class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler): | ||
340 | def __init__(self, ssl_factory=None): | ||
341 | KeepAliveHandler.__init__(self) | ||
342 | if not ssl_factory: | ||
343 | ssl_factory = sslfactory.get_factory() | ||
344 | self._ssl_factory = ssl_factory | ||
345 | |||
346 | def https_open(self, req): | ||
347 | return self.do_open(req) | ||
348 | |||
349 | def _get_connection(self, host): | ||
350 | return self._ssl_factory.get_https_connection(host) | ||
351 | |||
352 | class HTTPResponse(httplib.HTTPResponse): | ||
353 | # we need to subclass HTTPResponse in order to | ||
354 | # 1) add readline() and readlines() methods | ||
355 | # 2) add close_connection() methods | ||
356 | # 3) add info() and geturl() methods | ||
357 | |||
358 | # in order to add readline(), read must be modified to deal with a | ||
359 | # buffer. example: readline must read a buffer and then spit back | ||
360 | # one line at a time. The only real alternative is to read one | ||
361 | # BYTE at a time (ick). Once something has been read, it can't be | ||
362 | # put back (ok, maybe it can, but that's even uglier than this), | ||
363 | # so if you THEN do a normal read, you must first take stuff from | ||
364 | # the buffer. | ||
365 | |||
366 | # the read method wraps the original to accomodate buffering, | ||
367 | # although read() never adds to the buffer. | ||
368 | # Both readline and readlines have been stolen with almost no | ||
369 | # modification from socket.py | ||
370 | |||
371 | |||
372 | def __init__(self, sock, debuglevel=0, strict=0, method=None): | ||
373 | if method: # the httplib in python 2.3 uses the method arg | ||
374 | httplib.HTTPResponse.__init__(self, sock, debuglevel, method) | ||
375 | else: # 2.2 doesn't | ||
376 | httplib.HTTPResponse.__init__(self, sock, debuglevel) | ||
377 | self.fileno = sock.fileno | ||
378 | self.code = None | ||
379 | self._rbuf = '' | ||
380 | self._rbufsize = 8096 | ||
381 | self._handler = None # inserted by the handler later | ||
382 | self._host = None # (same) | ||
383 | self._url = None # (same) | ||
384 | self._connection = None # (same) | ||
385 | |||
386 | _raw_read = httplib.HTTPResponse.read | ||
387 | |||
388 | def close(self): | ||
389 | if self.fp: | ||
390 | self.fp.close() | ||
391 | self.fp = None | ||
392 | if self._handler: | ||
393 | self._handler._request_closed(self, self._host, | ||
394 | self._connection) | ||
395 | |||
396 | def close_connection(self): | ||
397 | self._handler._remove_connection(self._host, self._connection, close=1) | ||
398 | self.close() | ||
399 | |||
400 | def info(self): | ||
401 | return self.headers | ||
402 | |||
403 | def geturl(self): | ||
404 | return self._url | ||
405 | |||
406 | def read(self, amt=None): | ||
407 | # the _rbuf test is only in this first if for speed. It's not | ||
408 | # logically necessary | ||
409 | if self._rbuf and not amt is None: | ||
410 | L = len(self._rbuf) | ||
411 | if amt > L: | ||
412 | amt -= L | ||
413 | else: | ||
414 | s = self._rbuf[:amt] | ||
415 | self._rbuf = self._rbuf[amt:] | ||
416 | return s | ||
417 | |||
418 | s = self._rbuf + self._raw_read(amt) | ||
419 | self._rbuf = '' | ||
420 | return s | ||
421 | |||
422 | def readline(self, limit=-1): | ||
423 | data = "" | ||
424 | i = self._rbuf.find('\n') | ||
425 | while i < 0 and not (0 < limit <= len(self._rbuf)): | ||
426 | new = self._raw_read(self._rbufsize) | ||
427 | if not new: break | ||
428 | i = new.find('\n') | ||
429 | if i >= 0: i = i + len(self._rbuf) | ||
430 | self._rbuf = self._rbuf + new | ||
431 | if i < 0: i = len(self._rbuf) | ||
432 | else: i = i+1 | ||
433 | if 0 <= limit < len(self._rbuf): i = limit | ||
434 | data, self._rbuf = self._rbuf[:i], self._rbuf[i:] | ||
435 | return data | ||
436 | |||
437 | def readlines(self, sizehint = 0): | ||
438 | total = 0 | ||
439 | list = [] | ||
440 | while 1: | ||
441 | line = self.readline() | ||
442 | if not line: break | ||
443 | list.append(line) | ||
444 | total += len(line) | ||
445 | if sizehint and total >= sizehint: | ||
446 | break | ||
447 | return list | ||
448 | |||
449 | |||
450 | class HTTPConnection(httplib.HTTPConnection): | ||
451 | # use the modified response class | ||
452 | response_class = HTTPResponse | ||
453 | |||
454 | class HTTPSConnection(httplib.HTTPSConnection): | ||
455 | response_class = HTTPResponse | ||
456 | |||
457 | ######################################################################### | ||
458 | ##### TEST FUNCTIONS | ||
459 | ######################################################################### | ||
460 | |||
461 | def error_handler(url): | ||
462 | global HANDLE_ERRORS | ||
463 | orig = HANDLE_ERRORS | ||
464 | keepalive_handler = HTTPHandler() | ||
465 | opener = urllib2.build_opener(keepalive_handler) | ||
466 | urllib2.install_opener(opener) | ||
467 | pos = {0: 'off', 1: 'on'} | ||
468 | for i in (0, 1): | ||
469 | print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i) | ||
470 | HANDLE_ERRORS = i | ||
471 | try: | ||
472 | fo = urllib2.urlopen(url) | ||
473 | foo = fo.read() | ||
474 | fo.close() | ||
475 | try: status, reason = fo.status, fo.reason | ||
476 | except AttributeError: status, reason = None, None | ||
477 | except IOError, e: | ||
478 | print " EXCEPTION: %s" % e | ||
479 | raise | ||
480 | else: | ||
481 | print " status = %s, reason = %s" % (status, reason) | ||
482 | HANDLE_ERRORS = orig | ||
483 | hosts = keepalive_handler.open_connections() | ||
484 | print "open connections:", hosts | ||
485 | keepalive_handler.close_all() | ||
486 | |||
487 | def continuity(url): | ||
488 | import md5 | ||
489 | format = '%25s: %s' | ||
490 | |||
491 | # first fetch the file with the normal http handler | ||
492 | opener = urllib2.build_opener() | ||
493 | urllib2.install_opener(opener) | ||
494 | fo = urllib2.urlopen(url) | ||
495 | foo = fo.read() | ||
496 | fo.close() | ||
497 | m = md5.new(foo) | ||
498 | print format % ('normal urllib', m.hexdigest()) | ||
499 | |||
500 | # now install the keepalive handler and try again | ||
501 | opener = urllib2.build_opener(HTTPHandler()) | ||
502 | urllib2.install_opener(opener) | ||
503 | |||
504 | fo = urllib2.urlopen(url) | ||
505 | foo = fo.read() | ||
506 | fo.close() | ||
507 | m = md5.new(foo) | ||
508 | print format % ('keepalive read', m.hexdigest()) | ||
509 | |||
510 | fo = urllib2.urlopen(url) | ||
511 | foo = '' | ||
512 | while 1: | ||
513 | f = fo.readline() | ||
514 | if f: foo = foo + f | ||
515 | else: break | ||
516 | fo.close() | ||
517 | m = md5.new(foo) | ||
518 | print format % ('keepalive readline', m.hexdigest()) | ||
519 | |||
520 | def comp(N, url): | ||
521 | print ' making %i connections to:\n %s' % (N, url) | ||
522 | |||
523 | sys.stdout.write(' first using the normal urllib handlers') | ||
524 | # first use normal opener | ||
525 | opener = urllib2.build_opener() | ||
526 | urllib2.install_opener(opener) | ||
527 | t1 = fetch(N, url) | ||
528 | print ' TIME: %.3f s' % t1 | ||
529 | |||
530 | sys.stdout.write(' now using the keepalive handler ') | ||
531 | # now install the keepalive handler and try again | ||
532 | opener = urllib2.build_opener(HTTPHandler()) | ||
533 | urllib2.install_opener(opener) | ||
534 | t2 = fetch(N, url) | ||
535 | print ' TIME: %.3f s' % t2 | ||
536 | print ' improvement factor: %.2f' % (t1/t2, ) | ||
537 | |||
538 | def fetch(N, url, delay=0): | ||
539 | import time | ||
540 | lens = [] | ||
541 | starttime = time.time() | ||
542 | for i in range(N): | ||
543 | if delay and i > 0: time.sleep(delay) | ||
544 | fo = urllib2.urlopen(url) | ||
545 | foo = fo.read() | ||
546 | fo.close() | ||
547 | lens.append(len(foo)) | ||
548 | diff = time.time() - starttime | ||
549 | |||
550 | j = 0 | ||
551 | for i in lens[1:]: | ||
552 | j = j + 1 | ||
553 | if not i == lens[0]: | ||
554 | print "WARNING: inconsistent length on read %i: %i" % (j, i) | ||
555 | |||
556 | return diff | ||
557 | |||
558 | def test_timeout(url): | ||
559 | global DEBUG | ||
560 | dbbackup = DEBUG | ||
561 | class FakeLogger: | ||
562 | def debug(self, msg, *args): print msg % args | ||
563 | info = warning = error = debug | ||
564 | DEBUG = FakeLogger() | ||
565 | print " fetching the file to establish a connection" | ||
566 | fo = urllib2.urlopen(url) | ||
567 | data1 = fo.read() | ||
568 | fo.close() | ||
569 | |||
570 | i = 20 | ||
571 | print " waiting %i seconds for the server to close the connection" % i | ||
572 | while i > 0: | ||
573 | sys.stdout.write('\r %2i' % i) | ||
574 | sys.stdout.flush() | ||
575 | time.sleep(1) | ||
576 | i -= 1 | ||
577 | sys.stderr.write('\r') | ||
578 | |||
579 | print " fetching the file a second time" | ||
580 | fo = urllib2.urlopen(url) | ||
581 | data2 = fo.read() | ||
582 | fo.close() | ||
583 | |||
584 | if data1 == data2: | ||
585 | print ' data are identical' | ||
586 | else: | ||
587 | print ' ERROR: DATA DIFFER' | ||
588 | |||
589 | DEBUG = dbbackup | ||
590 | |||
591 | |||
592 | def test(url, N=10): | ||
593 | print "checking error hander (do this on a non-200)" | ||
594 | try: error_handler(url) | ||
595 | except IOError, e: | ||
596 | print "exiting - exception will prevent further tests" | ||
597 | sys.exit() | ||
598 | |||
599 | print "performing continuity test (making sure stuff isn't corrupted)" | ||
600 | continuity(url) | ||
601 | |||
602 | print "performing speed comparison" | ||
603 | comp(N, url) | ||
604 | |||
605 | print "performing dropped-connection check" | ||
606 | test_timeout(url) | ||
607 | |||
608 | if __name__ == '__main__': | ||
609 | import time | ||
610 | import sys | ||
611 | try: | ||
612 | N = int(sys.argv[1]) | ||
613 | url = sys.argv[2] | ||
614 | except: | ||
615 | print "%s <integer> <url>" % sys.argv[0] | ||
616 | else: | ||
617 | test(url, N) | ||
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py deleted file mode 100644 index 9664c6b5c5..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py +++ /dev/null | |||
@@ -1,458 +0,0 @@ | |||
1 | # This library is free software; you can redistribute it and/or | ||
2 | # modify it under the terms of the GNU Lesser General Public | ||
3 | # License as published by the Free Software Foundation; either | ||
4 | # version 2.1 of the License, or (at your option) any later version. | ||
5 | # | ||
6 | # This library is distributed in the hope that it will be useful, | ||
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
9 | # Lesser General Public License for more details. | ||
10 | # | ||
11 | # You should have received a copy of the GNU Lesser General Public | ||
12 | # License along with this library; if not, write to the | ||
13 | # Free Software Foundation, Inc., | ||
14 | # 59 Temple Place, Suite 330, | ||
15 | # Boston, MA 02111-1307 USA | ||
16 | |||
17 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||
18 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | ||
19 | |||
20 | """Module for downloading files from a pool of mirrors | ||
21 | |||
22 | DESCRIPTION | ||
23 | |||
24 | This module provides support for downloading files from a pool of | ||
25 | mirrors with configurable failover policies. To a large extent, the | ||
26 | failover policy is chosen by using different classes derived from | ||
27 | the main class, MirrorGroup. | ||
28 | |||
29 | Instances of MirrorGroup (and cousins) act very much like URLGrabber | ||
30 | instances in that they have urlread, urlgrab, and urlopen methods. | ||
31 | They can therefore, be used in very similar ways. | ||
32 | |||
33 | from urlgrabber.grabber import URLGrabber | ||
34 | from urlgrabber.mirror import MirrorGroup | ||
35 | gr = URLGrabber() | ||
36 | mg = MirrorGroup(gr, ['http://foo.com/some/directory/', | ||
37 | 'http://bar.org/maybe/somewhere/else/', | ||
38 | 'ftp://baz.net/some/other/place/entirely/'] | ||
39 | mg.urlgrab('relative/path.zip') | ||
40 | |||
41 | The assumption is that all mirrors are identical AFTER the base urls | ||
42 | specified, so that any mirror can be used to fetch any file. | ||
43 | |||
44 | FAILOVER | ||
45 | |||
46 | The failover mechanism is designed to be customized by subclassing | ||
47 | from MirrorGroup to change the details of the behavior. In general, | ||
48 | the classes maintain a master mirror list and a "current mirror" | ||
49 | index. When a download is initiated, a copy of this list and index | ||
50 | is created for that download only. The specific failover policy | ||
51 | depends on the class used, and so is documented in the class | ||
52 | documentation. Note that ANY behavior of the class can be | ||
53 | overridden, so any failover policy at all is possible (although | ||
54 | you may need to change the interface in extreme cases). | ||
55 | |||
56 | CUSTOMIZATION | ||
57 | |||
58 | Most customization of a MirrorGroup object is done at instantiation | ||
59 | time (or via subclassing). There are four major types of | ||
60 | customization: | ||
61 | |||
62 | 1) Pass in a custom urlgrabber - The passed in urlgrabber will be | ||
63 | used (by default... see #2) for the grabs, so options to it | ||
64 | apply for the url-fetching | ||
65 | |||
66 | 2) Custom mirror list - Mirror lists can simply be a list of | ||
67 | stings mirrors (as shown in the example above) but each can | ||
68 | also be a dict, allowing for more options. For example, the | ||
69 | first mirror in the list above could also have been: | ||
70 | |||
71 | {'mirror': 'http://foo.com/some/directory/', | ||
72 | 'grabber': <a custom grabber to be used for this mirror>, | ||
73 | 'kwargs': { <a dict of arguments passed to the grabber> }} | ||
74 | |||
75 | All mirrors are converted to this format internally. If | ||
76 | 'grabber' is omitted, the default grabber will be used. If | ||
77 | kwargs are omitted, then (duh) they will not be used. | ||
78 | |||
79 | 3) Pass keyword arguments when instantiating the mirror group. | ||
80 | See, for example, the failure_callback argument. | ||
81 | |||
82 | 4) Finally, any kwargs passed in for the specific file (to the | ||
83 | urlgrab method, for example) will be folded in. The options | ||
84 | passed into the grabber's urlXXX methods will override any | ||
85 | options specified in a custom mirror dict. | ||
86 | |||
87 | """ | ||
88 | |||
89 | # $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $ | ||
90 | |||
91 | import random | ||
92 | import thread # needed for locking to make this threadsafe | ||
93 | |||
94 | from grabber import URLGrabError, CallbackObject, DEBUG | ||
95 | |||
96 | try: | ||
97 | from i18n import _ | ||
98 | except ImportError, msg: | ||
99 | def _(st): return st | ||
100 | |||
101 | class GrabRequest: | ||
102 | """This is a dummy class used to hold information about the specific | ||
103 | request. For example, a single file. By maintaining this information | ||
104 | separately, we can accomplish two things: | ||
105 | |||
106 | 1) make it a little easier to be threadsafe | ||
107 | 2) have request-specific parameters | ||
108 | """ | ||
109 | pass | ||
110 | |||
111 | class MirrorGroup: | ||
112 | """Base Mirror class | ||
113 | |||
114 | Instances of this class are built with a grabber object and a list | ||
115 | of mirrors. Then all calls to urlXXX should be passed relative urls. | ||
116 | The requested file will be searched for on the first mirror. If the | ||
117 | grabber raises an exception (possibly after some retries) then that | ||
118 | mirror will be removed from the list, and the next will be attempted. | ||
119 | If all mirrors are exhausted, then an exception will be raised. | ||
120 | |||
121 | MirrorGroup has the following failover policy: | ||
122 | |||
123 | * downloads begin with the first mirror | ||
124 | |||
125 | * by default (see default_action below) a failure (after retries) | ||
126 | causes it to increment the local AND master indices. Also, | ||
127 | the current mirror is removed from the local list (but NOT the | ||
128 | master list - the mirror can potentially be used for other | ||
129 | files) | ||
130 | |||
131 | * if the local list is ever exhausted, a URLGrabError will be | ||
132 | raised (errno=256, no more mirrors) | ||
133 | |||
134 | OPTIONS | ||
135 | |||
136 | In addition to the required arguments "grabber" and "mirrors", | ||
137 | MirrorGroup also takes the following optional arguments: | ||
138 | |||
139 | default_action | ||
140 | |||
141 | A dict that describes the actions to be taken upon failure | ||
142 | (after retries). default_action can contain any of the | ||
143 | following keys (shown here with their default values): | ||
144 | |||
145 | default_action = {'increment': 1, | ||
146 | 'increment_master': 1, | ||
147 | 'remove': 1, | ||
148 | 'remove_master': 0, | ||
149 | 'fail': 0} | ||
150 | |||
151 | In this context, 'increment' means "use the next mirror" and | ||
152 | 'remove' means "never use this mirror again". The two | ||
153 | 'master' values refer to the instance-level mirror list (used | ||
154 | for all files), whereas the non-master values refer to the | ||
155 | current download only. | ||
156 | |||
157 | The 'fail' option will cause immediate failure by re-raising | ||
158 | the exception and no further attempts to get the current | ||
159 | download. | ||
160 | |||
161 | This dict can be set at instantiation time, | ||
162 | mg = MirrorGroup(grabber, mirrors, default_action={'fail':1}) | ||
163 | at method-execution time (only applies to current fetch), | ||
164 | filename = mg.urlgrab(url, default_action={'increment': 0}) | ||
165 | or by returning an action dict from the failure_callback | ||
166 | return {'fail':0} | ||
167 | in increasing precedence. | ||
168 | |||
169 | If all three of these were done, the net result would be: | ||
170 | {'increment': 0, # set in method | ||
171 | 'increment_master': 1, # class default | ||
172 | 'remove': 1, # class default | ||
173 | 'remove_master': 0, # class default | ||
174 | 'fail': 0} # set at instantiation, reset | ||
175 | # from callback | ||
176 | |||
177 | failure_callback | ||
178 | |||
179 | this is a callback that will be called when a mirror "fails", | ||
180 | meaning the grabber raises some URLGrabError. If this is a | ||
181 | tuple, it is interpreted to be of the form (cb, args, kwargs) | ||
182 | where cb is the actual callable object (function, method, | ||
183 | etc). Otherwise, it is assumed to be the callable object | ||
184 | itself. The callback will be passed a grabber.CallbackObject | ||
185 | instance along with args and kwargs (if present). The following | ||
186 | attributes are defined withing the instance: | ||
187 | |||
188 | obj.exception = < exception that was raised > | ||
189 | obj.mirror = < the mirror that was tried > | ||
190 | obj.relative_url = < url relative to the mirror > | ||
191 | obj.url = < full url that failed > | ||
192 | # .url is just the combination of .mirror | ||
193 | # and .relative_url | ||
194 | |||
195 | The failure callback can return an action dict, as described | ||
196 | above. | ||
197 | |||
198 | Like default_action, the failure_callback can be set at | ||
199 | instantiation time or when the urlXXX method is called. In | ||
200 | the latter case, it applies only for that fetch. | ||
201 | |||
202 | The callback can re-raise the exception quite easily. For | ||
203 | example, this is a perfectly adequate callback function: | ||
204 | |||
205 | def callback(obj): raise obj.exception | ||
206 | |||
207 | WARNING: do not save the exception object (or the | ||
208 | CallbackObject instance). As they contain stack frame | ||
209 | references, they can lead to circular references. | ||
210 | |||
211 | Notes: | ||
212 | * The behavior can be customized by deriving and overriding the | ||
213 | 'CONFIGURATION METHODS' | ||
214 | * The 'grabber' instance is kept as a reference, not copied. | ||
215 | Therefore, the grabber instance can be modified externally | ||
216 | and changes will take effect immediately. | ||
217 | """ | ||
218 | |||
219 | # notes on thread-safety: | ||
220 | |||
221 | # A GrabRequest should never be shared by multiple threads because | ||
222 | # it's never saved inside the MG object and never returned outside it. | ||
223 | # therefore, it should be safe to access/modify grabrequest data | ||
224 | # without a lock. However, accessing the mirrors and _next attributes | ||
225 | # of the MG itself must be done when locked to prevent (for example) | ||
226 | # removal of the wrong mirror. | ||
227 | |||
228 | ############################################################## | ||
229 | # CONFIGURATION METHODS - intended to be overridden to | ||
230 | # customize behavior | ||
231 | def __init__(self, grabber, mirrors, **kwargs): | ||
232 | """Initialize the MirrorGroup object. | ||
233 | |||
234 | REQUIRED ARGUMENTS | ||
235 | |||
236 | grabber - URLGrabber instance | ||
237 | mirrors - a list of mirrors | ||
238 | |||
239 | OPTIONAL ARGUMENTS | ||
240 | |||
241 | failure_callback - callback to be used when a mirror fails | ||
242 | default_action - dict of failure actions | ||
243 | |||
244 | See the module-level and class level documentation for more | ||
245 | details. | ||
246 | """ | ||
247 | |||
248 | # OVERRIDE IDEAS: | ||
249 | # shuffle the list to randomize order | ||
250 | self.grabber = grabber | ||
251 | self.mirrors = self._parse_mirrors(mirrors) | ||
252 | self._next = 0 | ||
253 | self._lock = thread.allocate_lock() | ||
254 | self.default_action = None | ||
255 | self._process_kwargs(kwargs) | ||
256 | |||
257 | # if these values are found in **kwargs passed to one of the urlXXX | ||
258 | # methods, they will be stripped before getting passed on to the | ||
259 | # grabber | ||
260 | options = ['default_action', 'failure_callback'] | ||
261 | |||
262 | def _process_kwargs(self, kwargs): | ||
263 | self.failure_callback = kwargs.get('failure_callback') | ||
264 | self.default_action = kwargs.get('default_action') | ||
265 | |||
266 | def _parse_mirrors(self, mirrors): | ||
267 | parsed_mirrors = [] | ||
268 | for m in mirrors: | ||
269 | if type(m) == type(''): m = {'mirror': m} | ||
270 | parsed_mirrors.append(m) | ||
271 | return parsed_mirrors | ||
272 | |||
273 | def _load_gr(self, gr): | ||
274 | # OVERRIDE IDEAS: | ||
275 | # shuffle gr list | ||
276 | self._lock.acquire() | ||
277 | gr.mirrors = list(self.mirrors) | ||
278 | gr._next = self._next | ||
279 | self._lock.release() | ||
280 | |||
281 | def _get_mirror(self, gr): | ||
282 | # OVERRIDE IDEAS: | ||
283 | # return a random mirror so that multiple mirrors get used | ||
284 | # even without failures. | ||
285 | if not gr.mirrors: | ||
286 | raise URLGrabError(256, _('No more mirrors to try.')) | ||
287 | return gr.mirrors[gr._next] | ||
288 | |||
289 | def _failure(self, gr, cb_obj): | ||
290 | # OVERRIDE IDEAS: | ||
291 | # inspect the error - remove=1 for 404, remove=2 for connection | ||
292 | # refused, etc. (this can also be done via | ||
293 | # the callback) | ||
294 | cb = gr.kw.get('failure_callback') or self.failure_callback | ||
295 | if cb: | ||
296 | if type(cb) == type( () ): | ||
297 | cb, args, kwargs = cb | ||
298 | else: | ||
299 | args, kwargs = (), {} | ||
300 | action = cb(cb_obj, *args, **kwargs) or {} | ||
301 | else: | ||
302 | action = {} | ||
303 | # XXXX - decide - there are two ways to do this | ||
304 | # the first is action-overriding as a whole - use the entire action | ||
305 | # or fall back on module level defaults | ||
306 | #action = action or gr.kw.get('default_action') or self.default_action | ||
307 | # the other is to fall through for each element in the action dict | ||
308 | a = dict(self.default_action or {}) | ||
309 | a.update(gr.kw.get('default_action', {})) | ||
310 | a.update(action) | ||
311 | action = a | ||
312 | self.increment_mirror(gr, action) | ||
313 | if action and action.get('fail', 0): raise | ||
314 | |||
315 | def increment_mirror(self, gr, action={}): | ||
316 | """Tell the mirror object increment the mirror index | ||
317 | |||
318 | This increments the mirror index, which amounts to telling the | ||
319 | mirror object to use a different mirror (for this and future | ||
320 | downloads). | ||
321 | |||
322 | This is a SEMI-public method. It will be called internally, | ||
323 | and you may never need to call it. However, it is provided | ||
324 | (and is made public) so that the calling program can increment | ||
325 | the mirror choice for methods like urlopen. For example, with | ||
326 | urlopen, there's no good way for the mirror group to know that | ||
327 | an error occurs mid-download (it's already returned and given | ||
328 | you the file object). | ||
329 | |||
330 | remove --- can have several values | ||
331 | 0 do not remove the mirror from the list | ||
332 | 1 remove the mirror for this download only | ||
333 | 2 remove the mirror permanently | ||
334 | |||
335 | beware of remove=0 as it can lead to infinite loops | ||
336 | """ | ||
337 | badmirror = gr.mirrors[gr._next] | ||
338 | |||
339 | self._lock.acquire() | ||
340 | try: | ||
341 | ind = self.mirrors.index(badmirror) | ||
342 | except ValueError: | ||
343 | pass | ||
344 | else: | ||
345 | if action.get('remove_master', 0): | ||
346 | del self.mirrors[ind] | ||
347 | elif self._next == ind and action.get('increment_master', 1): | ||
348 | self._next += 1 | ||
349 | if self._next >= len(self.mirrors): self._next = 0 | ||
350 | self._lock.release() | ||
351 | |||
352 | if action.get('remove', 1): | ||
353 | del gr.mirrors[gr._next] | ||
354 | elif action.get('increment', 1): | ||
355 | gr._next += 1 | ||
356 | if gr._next >= len(gr.mirrors): gr._next = 0 | ||
357 | |||
358 | if DEBUG: | ||
359 | grm = [m['mirror'] for m in gr.mirrors] | ||
360 | DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next) | ||
361 | selfm = [m['mirror'] for m in self.mirrors] | ||
362 | DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next) | ||
363 | |||
364 | ##################################################################### | ||
365 | # NON-CONFIGURATION METHODS | ||
366 | # these methods are designed to be largely workhorse methods that | ||
367 | # are not intended to be overridden. That doesn't mean you can't; | ||
368 | # if you want to, feel free, but most things can be done by | ||
369 | # by overriding the configuration methods :) | ||
370 | |||
371 | def _join_url(self, base_url, rel_url): | ||
372 | if base_url.endswith('/') or rel_url.startswith('/'): | ||
373 | return base_url + rel_url | ||
374 | else: | ||
375 | return base_url + '/' + rel_url | ||
376 | |||
377 | def _mirror_try(self, func, url, kw): | ||
378 | gr = GrabRequest() | ||
379 | gr.func = func | ||
380 | gr.url = url | ||
381 | gr.kw = dict(kw) | ||
382 | self._load_gr(gr) | ||
383 | |||
384 | for k in self.options: | ||
385 | try: del kw[k] | ||
386 | except KeyError: pass | ||
387 | |||
388 | while 1: | ||
389 | mirrorchoice = self._get_mirror(gr) | ||
390 | fullurl = self._join_url(mirrorchoice['mirror'], gr.url) | ||
391 | kwargs = dict(mirrorchoice.get('kwargs', {})) | ||
392 | kwargs.update(kw) | ||
393 | grabber = mirrorchoice.get('grabber') or self.grabber | ||
394 | func_ref = getattr(grabber, func) | ||
395 | if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) | ||
396 | try: | ||
397 | return func_ref( *(fullurl,), **kwargs ) | ||
398 | except URLGrabError, e: | ||
399 | if DEBUG: DEBUG.info('MIRROR: failed') | ||
400 | obj = CallbackObject() | ||
401 | obj.exception = e | ||
402 | obj.mirror = mirrorchoice['mirror'] | ||
403 | obj.relative_url = gr.url | ||
404 | obj.url = fullurl | ||
405 | self._failure(gr, obj) | ||
406 | |||
407 | def urlgrab(self, url, filename=None, **kwargs): | ||
408 | kw = dict(kwargs) | ||
409 | kw['filename'] = filename | ||
410 | func = 'urlgrab' | ||
411 | return self._mirror_try(func, url, kw) | ||
412 | |||
413 | def urlopen(self, url, **kwargs): | ||
414 | kw = dict(kwargs) | ||
415 | func = 'urlopen' | ||
416 | return self._mirror_try(func, url, kw) | ||
417 | |||
418 | def urlread(self, url, limit=None, **kwargs): | ||
419 | kw = dict(kwargs) | ||
420 | kw['limit'] = limit | ||
421 | func = 'urlread' | ||
422 | return self._mirror_try(func, url, kw) | ||
423 | |||
424 | |||
425 | class MGRandomStart(MirrorGroup): | ||
426 | """A mirror group that starts at a random mirror in the list. | ||
427 | |||
428 | This behavior of this class is identical to MirrorGroup, except that | ||
429 | it starts at a random location in the mirror list. | ||
430 | """ | ||
431 | |||
432 | def __init__(self, grabber, mirrors, **kwargs): | ||
433 | """Initialize the object | ||
434 | |||
435 | The arguments for intialization are the same as for MirrorGroup | ||
436 | """ | ||
437 | MirrorGroup.__init__(self, grabber, mirrors, **kwargs) | ||
438 | self._next = random.randrange(len(mirrors)) | ||
439 | |||
440 | class MGRandomOrder(MirrorGroup): | ||
441 | """A mirror group that uses mirrors in a random order. | ||
442 | |||
443 | This behavior of this class is identical to MirrorGroup, except that | ||
444 | it uses the mirrors in a random order. Note that the order is set at | ||
445 | initialization time and fixed thereafter. That is, it does not pick a | ||
446 | random mirror after each failure. | ||
447 | """ | ||
448 | |||
449 | def __init__(self, grabber, mirrors, **kwargs): | ||
450 | """Initialize the object | ||
451 | |||
452 | The arguments for intialization are the same as for MirrorGroup | ||
453 | """ | ||
454 | MirrorGroup.__init__(self, grabber, mirrors, **kwargs) | ||
455 | random.shuffle(self.mirrors) | ||
456 | |||
457 | if __name__ == '__main__': | ||
458 | pass | ||
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py deleted file mode 100644 index 02db524e76..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/progress.py +++ /dev/null | |||
@@ -1,530 +0,0 @@ | |||
1 | # This library is free software; you can redistribute it and/or | ||
2 | # modify it under the terms of the GNU Lesser General Public | ||
3 | # License as published by the Free Software Foundation; either | ||
4 | # version 2.1 of the License, or (at your option) any later version. | ||
5 | # | ||
6 | # This library is distributed in the hope that it will be useful, | ||
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
9 | # Lesser General Public License for more details. | ||
10 | # | ||
11 | # You should have received a copy of the GNU Lesser General Public | ||
12 | # License along with this library; if not, write to the | ||
13 | # Free Software Foundation, Inc., | ||
14 | # 59 Temple Place, Suite 330, | ||
15 | # Boston, MA 02111-1307 USA | ||
16 | |||
17 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||
18 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | ||
19 | |||
20 | # $Id: progress.py,v 1.7 2005/08/19 21:59:07 mstenner Exp $ | ||
21 | |||
22 | import sys | ||
23 | import time | ||
24 | import math | ||
25 | import thread | ||
26 | |||
27 | class BaseMeter: | ||
28 | def __init__(self): | ||
29 | self.update_period = 0.3 # seconds | ||
30 | |||
31 | self.filename = None | ||
32 | self.url = None | ||
33 | self.basename = None | ||
34 | self.text = None | ||
35 | self.size = None | ||
36 | self.start_time = None | ||
37 | self.last_amount_read = 0 | ||
38 | self.last_update_time = None | ||
39 | self.re = RateEstimator() | ||
40 | |||
41 | def start(self, filename=None, url=None, basename=None, | ||
42 | size=None, now=None, text=None): | ||
43 | self.filename = filename | ||
44 | self.url = url | ||
45 | self.basename = basename | ||
46 | self.text = text | ||
47 | |||
48 | #size = None ######### TESTING | ||
49 | self.size = size | ||
50 | if not size is None: self.fsize = format_number(size) + 'B' | ||
51 | |||
52 | if now is None: now = time.time() | ||
53 | self.start_time = now | ||
54 | self.re.start(size, now) | ||
55 | self.last_amount_read = 0 | ||
56 | self.last_update_time = now | ||
57 | self._do_start(now) | ||
58 | |||
59 | def _do_start(self, now=None): | ||
60 | pass | ||
61 | |||
62 | def update(self, amount_read, now=None): | ||
63 | # for a real gui, you probably want to override and put a call | ||
64 | # to your mainloop iteration function here | ||
65 | if now is None: now = time.time() | ||
66 | if (now >= self.last_update_time + self.update_period) or \ | ||
67 | not self.last_update_time: | ||
68 | self.re.update(amount_read, now) | ||
69 | self.last_amount_read = amount_read | ||
70 | self.last_update_time = now | ||
71 | self._do_update(amount_read, now) | ||
72 | |||
73 | def _do_update(self, amount_read, now=None): | ||
74 | pass | ||
75 | |||
76 | def end(self, amount_read, now=None): | ||
77 | if now is None: now = time.time() | ||
78 | self.re.update(amount_read, now) | ||
79 | self.last_amount_read = amount_read | ||
80 | self.last_update_time = now | ||
81 | self._do_end(amount_read, now) | ||
82 | |||
83 | def _do_end(self, amount_read, now=None): | ||
84 | pass | ||
85 | |||
86 | class TextMeter(BaseMeter): | ||
87 | def __init__(self, fo=sys.stderr): | ||
88 | BaseMeter.__init__(self) | ||
89 | self.fo = fo | ||
90 | |||
91 | def _do_update(self, amount_read, now=None): | ||
92 | etime = self.re.elapsed_time() | ||
93 | fetime = format_time(etime) | ||
94 | fread = format_number(amount_read) | ||
95 | #self.size = None | ||
96 | if self.text is not None: | ||
97 | text = self.text | ||
98 | else: | ||
99 | text = self.basename | ||
100 | if self.size is None: | ||
101 | out = '\r%-60.60s %5sB %s ' % \ | ||
102 | (text, fread, fetime) | ||
103 | else: | ||
104 | rtime = self.re.remaining_time() | ||
105 | frtime = format_time(rtime) | ||
106 | frac = self.re.fraction_read() | ||
107 | bar = '='*int(25 * frac) | ||
108 | |||
109 | out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \ | ||
110 | (text, frac*100, bar, fread, frtime) | ||
111 | |||
112 | self.fo.write(out) | ||
113 | self.fo.flush() | ||
114 | |||
115 | def _do_end(self, amount_read, now=None): | ||
116 | total_time = format_time(self.re.elapsed_time()) | ||
117 | total_size = format_number(amount_read) | ||
118 | if self.text is not None: | ||
119 | text = self.text | ||
120 | else: | ||
121 | text = self.basename | ||
122 | if self.size is None: | ||
123 | out = '\r%-60.60s %5sB %s ' % \ | ||
124 | (text, total_size, total_time) | ||
125 | else: | ||
126 | bar = '='*25 | ||
127 | out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ' % \ | ||
128 | (text, 100, bar, total_size, total_time) | ||
129 | self.fo.write(out + '\n') | ||
130 | self.fo.flush() | ||
131 | |||
132 | text_progress_meter = TextMeter | ||
133 | |||
134 | class MultiFileHelper(BaseMeter): | ||
135 | def __init__(self, master): | ||
136 | BaseMeter.__init__(self) | ||
137 | self.master = master | ||
138 | |||
139 | def _do_start(self, now): | ||
140 | self.master.start_meter(self, now) | ||
141 | |||
142 | def _do_update(self, amount_read, now): | ||
143 | # elapsed time since last update | ||
144 | self.master.update_meter(self, now) | ||
145 | |||
146 | def _do_end(self, amount_read, now): | ||
147 | self.ftotal_time = format_time(now - self.start_time) | ||
148 | self.ftotal_size = format_number(self.last_amount_read) | ||
149 | self.master.end_meter(self, now) | ||
150 | |||
151 | def failure(self, message, now=None): | ||
152 | self.master.failure_meter(self, message, now) | ||
153 | |||
154 | def message(self, message): | ||
155 | self.master.message_meter(self, message) | ||
156 | |||
157 | class MultiFileMeter: | ||
158 | helperclass = MultiFileHelper | ||
159 | def __init__(self): | ||
160 | self.meters = [] | ||
161 | self.in_progress_meters = [] | ||
162 | self._lock = thread.allocate_lock() | ||
163 | self.update_period = 0.3 # seconds | ||
164 | |||
165 | self.numfiles = None | ||
166 | self.finished_files = 0 | ||
167 | self.failed_files = 0 | ||
168 | self.open_files = 0 | ||
169 | self.total_size = None | ||
170 | self.failed_size = 0 | ||
171 | self.start_time = None | ||
172 | self.finished_file_size = 0 | ||
173 | self.last_update_time = None | ||
174 | self.re = RateEstimator() | ||
175 | |||
176 | def start(self, numfiles=None, total_size=None, now=None): | ||
177 | if now is None: now = time.time() | ||
178 | self.numfiles = numfiles | ||
179 | self.finished_files = 0 | ||
180 | self.failed_files = 0 | ||
181 | self.open_files = 0 | ||
182 | self.total_size = total_size | ||
183 | self.failed_size = 0 | ||
184 | self.start_time = now | ||
185 | self.finished_file_size = 0 | ||
186 | self.last_update_time = now | ||
187 | self.re.start(total_size, now) | ||
188 | self._do_start(now) | ||
189 | |||
190 | def _do_start(self, now): | ||
191 | pass | ||
192 | |||
193 | def end(self, now=None): | ||
194 | if now is None: now = time.time() | ||
195 | self._do_end(now) | ||
196 | |||
197 | def _do_end(self, now): | ||
198 | pass | ||
199 | |||
200 | def lock(self): self._lock.acquire() | ||
201 | def unlock(self): self._lock.release() | ||
202 | |||
203 | ########################################################### | ||
204 | # child meter creation and destruction | ||
205 | def newMeter(self): | ||
206 | newmeter = self.helperclass(self) | ||
207 | self.meters.append(newmeter) | ||
208 | return newmeter | ||
209 | |||
210 | def removeMeter(self, meter): | ||
211 | self.meters.remove(meter) | ||
212 | |||
213 | ########################################################### | ||
214 | # child functions - these should only be called by helpers | ||
215 | def start_meter(self, meter, now): | ||
216 | if not meter in self.meters: | ||
217 | raise ValueError('attempt to use orphaned meter') | ||
218 | self._lock.acquire() | ||
219 | try: | ||
220 | if not meter in self.in_progress_meters: | ||
221 | self.in_progress_meters.append(meter) | ||
222 | self.open_files += 1 | ||
223 | finally: | ||
224 | self._lock.release() | ||
225 | self._do_start_meter(meter, now) | ||
226 | |||
227 | def _do_start_meter(self, meter, now): | ||
228 | pass | ||
229 | |||
230 | def update_meter(self, meter, now): | ||
231 | if not meter in self.meters: | ||
232 | raise ValueError('attempt to use orphaned meter') | ||
233 | if (now >= self.last_update_time + self.update_period) or \ | ||
234 | not self.last_update_time: | ||
235 | self.re.update(self._amount_read(), now) | ||
236 | self.last_update_time = now | ||
237 | self._do_update_meter(meter, now) | ||
238 | |||
239 | def _do_update_meter(self, meter, now): | ||
240 | pass | ||
241 | |||
242 | def end_meter(self, meter, now): | ||
243 | if not meter in self.meters: | ||
244 | raise ValueError('attempt to use orphaned meter') | ||
245 | self._lock.acquire() | ||
246 | try: | ||
247 | try: self.in_progress_meters.remove(meter) | ||
248 | except ValueError: pass | ||
249 | self.open_files -= 1 | ||
250 | self.finished_files += 1 | ||
251 | self.finished_file_size += meter.last_amount_read | ||
252 | finally: | ||
253 | self._lock.release() | ||
254 | self._do_end_meter(meter, now) | ||
255 | |||
256 | def _do_end_meter(self, meter, now): | ||
257 | pass | ||
258 | |||
259 | def failure_meter(self, meter, message, now): | ||
260 | if not meter in self.meters: | ||
261 | raise ValueError('attempt to use orphaned meter') | ||
262 | self._lock.acquire() | ||
263 | try: | ||
264 | try: self.in_progress_meters.remove(meter) | ||
265 | except ValueError: pass | ||
266 | self.open_files -= 1 | ||
267 | self.failed_files += 1 | ||
268 | if meter.size and self.failed_size is not None: | ||
269 | self.failed_size += meter.size | ||
270 | else: | ||
271 | self.failed_size = None | ||
272 | finally: | ||
273 | self._lock.release() | ||
274 | self._do_failure_meter(meter, message, now) | ||
275 | |||
276 | def _do_failure_meter(self, meter, message, now): | ||
277 | pass | ||
278 | |||
279 | def message_meter(self, meter, message): | ||
280 | pass | ||
281 | |||
282 | ######################################################## | ||
283 | # internal functions | ||
284 | def _amount_read(self): | ||
285 | tot = self.finished_file_size | ||
286 | for m in self.in_progress_meters: | ||
287 | tot += m.last_amount_read | ||
288 | return tot | ||
289 | |||
290 | |||
291 | class TextMultiFileMeter(MultiFileMeter): | ||
292 | def __init__(self, fo=sys.stderr): | ||
293 | self.fo = fo | ||
294 | MultiFileMeter.__init__(self) | ||
295 | |||
296 | # files: ###/### ###% data: ######/###### ###% time: ##:##:##/##:##:## | ||
297 | def _do_update_meter(self, meter, now): | ||
298 | self._lock.acquire() | ||
299 | try: | ||
300 | format = "files: %3i/%-3i %3i%% data: %6.6s/%-6.6s %3i%% " \ | ||
301 | "time: %8.8s/%8.8s" | ||
302 | df = self.finished_files | ||
303 | tf = self.numfiles or 1 | ||
304 | pf = 100 * float(df)/tf + 0.49 | ||
305 | dd = self.re.last_amount_read | ||
306 | td = self.total_size | ||
307 | pd = 100 * (self.re.fraction_read() or 0) + 0.49 | ||
308 | dt = self.re.elapsed_time() | ||
309 | rt = self.re.remaining_time() | ||
310 | if rt is None: tt = None | ||
311 | else: tt = dt + rt | ||
312 | |||
313 | fdd = format_number(dd) + 'B' | ||
314 | ftd = format_number(td) + 'B' | ||
315 | fdt = format_time(dt, 1) | ||
316 | ftt = format_time(tt, 1) | ||
317 | |||
318 | out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt)) | ||
319 | self.fo.write('\r' + out) | ||
320 | self.fo.flush() | ||
321 | finally: | ||
322 | self._lock.release() | ||
323 | |||
324 | def _do_end_meter(self, meter, now): | ||
325 | self._lock.acquire() | ||
326 | try: | ||
327 | format = "%-30.30s %6.6s %8.8s %9.9s" | ||
328 | fn = meter.basename | ||
329 | size = meter.last_amount_read | ||
330 | fsize = format_number(size) + 'B' | ||
331 | et = meter.re.elapsed_time() | ||
332 | fet = format_time(et, 1) | ||
333 | frate = format_number(size / et) + 'B/s' | ||
334 | |||
335 | out = '%-79.79s' % (format % (fn, fsize, fet, frate)) | ||
336 | self.fo.write('\r' + out + '\n') | ||
337 | finally: | ||
338 | self._lock.release() | ||
339 | self._do_update_meter(meter, now) | ||
340 | |||
341 | def _do_failure_meter(self, meter, message, now): | ||
342 | self._lock.acquire() | ||
343 | try: | ||
344 | format = "%-30.30s %6.6s %s" | ||
345 | fn = meter.basename | ||
346 | if type(message) in (type(''), type(u'')): | ||
347 | message = message.splitlines() | ||
348 | if not message: message = [''] | ||
349 | out = '%-79s' % (format % (fn, 'FAILED', message[0] or '')) | ||
350 | self.fo.write('\r' + out + '\n') | ||
351 | for m in message[1:]: self.fo.write(' ' + m + '\n') | ||
352 | self._lock.release() | ||
353 | finally: | ||
354 | self._do_update_meter(meter, now) | ||
355 | |||
356 | def message_meter(self, meter, message): | ||
357 | self._lock.acquire() | ||
358 | try: | ||
359 | pass | ||
360 | finally: | ||
361 | self._lock.release() | ||
362 | |||
363 | def _do_end(self, now): | ||
364 | self._do_update_meter(None, now) | ||
365 | self._lock.acquire() | ||
366 | try: | ||
367 | self.fo.write('\n') | ||
368 | self.fo.flush() | ||
369 | finally: | ||
370 | self._lock.release() | ||
371 | |||
372 | ###################################################################### | ||
373 | # support classes and functions | ||
374 | |||
375 | class RateEstimator: | ||
376 | def __init__(self, timescale=5.0): | ||
377 | self.timescale = timescale | ||
378 | |||
379 | def start(self, total=None, now=None): | ||
380 | if now is None: now = time.time() | ||
381 | self.total = total | ||
382 | self.start_time = now | ||
383 | self.last_update_time = now | ||
384 | self.last_amount_read = 0 | ||
385 | self.ave_rate = None | ||
386 | |||
387 | def update(self, amount_read, now=None): | ||
388 | if now is None: now = time.time() | ||
389 | if amount_read == 0: | ||
390 | # if we just started this file, all bets are off | ||
391 | self.last_update_time = now | ||
392 | self.last_amount_read = 0 | ||
393 | self.ave_rate = None | ||
394 | return | ||
395 | |||
396 | #print 'times', now, self.last_update_time | ||
397 | time_diff = now - self.last_update_time | ||
398 | read_diff = amount_read - self.last_amount_read | ||
399 | self.last_update_time = now | ||
400 | self.last_amount_read = amount_read | ||
401 | self.ave_rate = self._temporal_rolling_ave(\ | ||
402 | time_diff, read_diff, self.ave_rate, self.timescale) | ||
403 | #print 'results', time_diff, read_diff, self.ave_rate | ||
404 | |||
405 | ##################################################################### | ||
406 | # result methods | ||
407 | def average_rate(self): | ||
408 | "get the average transfer rate (in bytes/second)" | ||
409 | return self.ave_rate | ||
410 | |||
411 | def elapsed_time(self): | ||
412 | "the time between the start of the transfer and the most recent update" | ||
413 | return self.last_update_time - self.start_time | ||
414 | |||
415 | def remaining_time(self): | ||
416 | "estimated time remaining" | ||
417 | if not self.ave_rate or not self.total: return None | ||
418 | return (self.total - self.last_amount_read) / self.ave_rate | ||
419 | |||
420 | def fraction_read(self): | ||
421 | """the fraction of the data that has been read | ||
422 | (can be None for unknown transfer size)""" | ||
423 | if self.total is None: return None | ||
424 | elif self.total == 0: return 1.0 | ||
425 | else: return float(self.last_amount_read)/self.total | ||
426 | |||
427 | ######################################################################### | ||
428 | # support methods | ||
429 | def _temporal_rolling_ave(self, time_diff, read_diff, last_ave, timescale): | ||
430 | """a temporal rolling average performs smooth averaging even when | ||
431 | updates come at irregular intervals. This is performed by scaling | ||
432 | the "epsilon" according to the time since the last update. | ||
433 | Specifically, epsilon = time_diff / timescale | ||
434 | |||
435 | As a general rule, the average will take on a completely new value | ||
436 | after 'timescale' seconds.""" | ||
437 | epsilon = time_diff / timescale | ||
438 | if epsilon > 1: epsilon = 1.0 | ||
439 | return self._rolling_ave(time_diff, read_diff, last_ave, epsilon) | ||
440 | |||
441 | def _rolling_ave(self, time_diff, read_diff, last_ave, epsilon): | ||
442 | """perform a "rolling average" iteration | ||
443 | a rolling average "folds" new data into an existing average with | ||
444 | some weight, epsilon. epsilon must be between 0.0 and 1.0 (inclusive) | ||
445 | a value of 0.0 means only the old value (initial value) counts, | ||
446 | and a value of 1.0 means only the newest value is considered.""" | ||
447 | |||
448 | try: | ||
449 | recent_rate = read_diff / time_diff | ||
450 | except ZeroDivisionError: | ||
451 | recent_rate = None | ||
452 | if last_ave is None: return recent_rate | ||
453 | elif recent_rate is None: return last_ave | ||
454 | |||
455 | # at this point, both last_ave and recent_rate are numbers | ||
456 | return epsilon * recent_rate + (1 - epsilon) * last_ave | ||
457 | |||
458 | def _round_remaining_time(self, rt, start_time=15.0): | ||
459 | """round the remaining time, depending on its size | ||
460 | If rt is between n*start_time and (n+1)*start_time round downward | ||
461 | to the nearest multiple of n (for any counting number n). | ||
462 | If rt < start_time, round down to the nearest 1. | ||
463 | For example (for start_time = 15.0): | ||
464 | 2.7 -> 2.0 | ||
465 | 25.2 -> 25.0 | ||
466 | 26.4 -> 26.0 | ||
467 | 35.3 -> 34.0 | ||
468 | 63.6 -> 60.0 | ||
469 | """ | ||
470 | |||
471 | if rt < 0: return 0.0 | ||
472 | shift = int(math.log(rt/start_time)/math.log(2)) | ||
473 | rt = int(rt) | ||
474 | if shift <= 0: return rt | ||
475 | return float(int(rt) >> shift << shift) | ||
476 | |||
477 | |||
478 | def format_time(seconds, use_hours=0): | ||
479 | if seconds is None or seconds < 0: | ||
480 | if use_hours: return '--:--:--' | ||
481 | else: return '--:--' | ||
482 | else: | ||
483 | seconds = int(seconds) | ||
484 | minutes = seconds / 60 | ||
485 | seconds = seconds % 60 | ||
486 | if use_hours: | ||
487 | hours = minutes / 60 | ||
488 | minutes = minutes % 60 | ||
489 | return '%02i:%02i:%02i' % (hours, minutes, seconds) | ||
490 | else: | ||
491 | return '%02i:%02i' % (minutes, seconds) | ||
492 | |||
493 | def format_number(number, SI=0, space=' '): | ||
494 | """Turn numbers into human-readable metric-like numbers""" | ||
495 | symbols = ['', # (none) | ||
496 | 'k', # kilo | ||
497 | 'M', # mega | ||
498 | 'G', # giga | ||
499 | 'T', # tera | ||
500 | 'P', # peta | ||
501 | 'E', # exa | ||
502 | 'Z', # zetta | ||
503 | 'Y'] # yotta | ||
504 | |||
505 | if SI: step = 1000.0 | ||
506 | else: step = 1024.0 | ||
507 | |||
508 | thresh = 999 | ||
509 | depth = 0 | ||
510 | max_depth = len(symbols) - 1 | ||
511 | |||
512 | # we want numbers between 0 and thresh, but don't exceed the length | ||
513 | # of our list. In that event, the formatting will be screwed up, | ||
514 | # but it'll still show the right number. | ||
515 | while number > thresh and depth < max_depth: | ||
516 | depth = depth + 1 | ||
517 | number = number / step | ||
518 | |||
519 | if type(number) == type(1) or type(number) == type(1L): | ||
520 | # it's an int or a long, which means it didn't get divided, | ||
521 | # which means it's already short enough | ||
522 | format = '%i%s%s' | ||
523 | elif number < 9.95: | ||
524 | # must use 9.95 for proper sizing. For example, 9.99 will be | ||
525 | # rounded to 10.0 with the .1f format string (which is too long) | ||
526 | format = '%.1f%s%s' | ||
527 | else: | ||
528 | format = '%.0f%s%s' | ||
529 | |||
530 | return(format % (float(number or 0), space, symbols[depth])) | ||
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py deleted file mode 100644 index 07848dac7c..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/sslfactory.py +++ /dev/null | |||
@@ -1,90 +0,0 @@ | |||
1 | # This library is free software; you can redistribute it and/or | ||
2 | # modify it under the terms of the GNU Lesser General Public | ||
3 | # License as published by the Free Software Foundation; either | ||
4 | # version 2.1 of the License, or (at your option) any later version. | ||
5 | # | ||
6 | # This library is distributed in the hope that it will be useful, | ||
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
9 | # Lesser General Public License for more details. | ||
10 | # | ||
11 | # You should have received a copy of the GNU Lesser General Public | ||
12 | # License along with this library; if not, write to the | ||
13 | # Free Software Foundation, Inc., | ||
14 | # 59 Temple Place, Suite 330, | ||
15 | # Boston, MA 02111-1307 USA | ||
16 | |||
17 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||
18 | |||
19 | import httplib | ||
20 | import urllib2 | ||
21 | |||
22 | try: | ||
23 | from M2Crypto import SSL | ||
24 | from M2Crypto import httpslib | ||
25 | from M2Crypto import m2urllib2 | ||
26 | |||
27 | SSL.Connection.clientPostConnectionCheck = None | ||
28 | have_m2crypto = True | ||
29 | except ImportError: | ||
30 | have_m2crypto = False | ||
31 | |||
32 | DEBUG = None | ||
33 | |||
34 | if have_m2crypto: | ||
35 | |||
36 | class M2SSLFactory: | ||
37 | |||
38 | def __init__(self, ssl_ca_cert, ssl_context): | ||
39 | self.ssl_context = self._get_ssl_context(ssl_ca_cert, ssl_context) | ||
40 | |||
41 | def _get_ssl_context(self, ssl_ca_cert, ssl_context): | ||
42 | """ | ||
43 | Create an ssl context using the CA cert file or ssl context. | ||
44 | |||
45 | The CA cert is used first if it was passed as an option. If not, | ||
46 | then the supplied ssl context is used. If no ssl context was supplied, | ||
47 | None is returned. | ||
48 | """ | ||
49 | if ssl_ca_cert: | ||
50 | context = SSL.Context() | ||
51 | context.load_verify_locations(ssl_ca_cert) | ||
52 | context.set_verify(SSL.verify_none, -1) | ||
53 | return context | ||
54 | else: | ||
55 | return ssl_context | ||
56 | |||
57 | def create_https_connection(self, host, response_class = None): | ||
58 | connection = httplib.HTTPSConnection(host, self.ssl_context) | ||
59 | if response_class: | ||
60 | connection.response_class = response_class | ||
61 | return connection | ||
62 | |||
63 | def create_opener(self, *handlers): | ||
64 | return m2urllib2.build_opener(self.ssl_context, *handlers) | ||
65 | |||
66 | |||
67 | class SSLFactory: | ||
68 | |||
69 | def create_https_connection(self, host, response_class = None): | ||
70 | connection = httplib.HTTPSConnection(host) | ||
71 | if response_class: | ||
72 | connection.response_class = response_class | ||
73 | return connection | ||
74 | |||
75 | def create_opener(self, *handlers): | ||
76 | return urllib2.build_opener(*handlers) | ||
77 | |||
78 | |||
79 | |||
80 | def get_factory(ssl_ca_cert = None, ssl_context = None): | ||
81 | """ Return an SSLFactory, based on if M2Crypto is available. """ | ||
82 | if have_m2crypto: | ||
83 | return M2SSLFactory(ssl_ca_cert, ssl_context) | ||
84 | else: | ||
85 | # Log here if someone provides the args but we don't use them. | ||
86 | if ssl_ca_cert or ssl_context: | ||
87 | if DEBUG: | ||
88 | DEBUG.warning("SSL arguments supplied, but M2Crypto is not available. " | ||
89 | "Using Python SSL.") | ||
90 | return SSLFactory() | ||