diff options
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py')
-rw-r--r-- | scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py | 458 |
1 files changed, 0 insertions, 458 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py deleted file mode 100644 index 9664c6b5c5..0000000000 --- a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py +++ /dev/null | |||
@@ -1,458 +0,0 @@ | |||
1 | # This library is free software; you can redistribute it and/or | ||
2 | # modify it under the terms of the GNU Lesser General Public | ||
3 | # License as published by the Free Software Foundation; either | ||
4 | # version 2.1 of the License, or (at your option) any later version. | ||
5 | # | ||
6 | # This library is distributed in the hope that it will be useful, | ||
7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
9 | # Lesser General Public License for more details. | ||
10 | # | ||
11 | # You should have received a copy of the GNU Lesser General Public | ||
12 | # License along with this library; if not, write to the | ||
13 | # Free Software Foundation, Inc., | ||
14 | # 59 Temple Place, Suite 330, | ||
15 | # Boston, MA 02111-1307 USA | ||
16 | |||
17 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||
18 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | ||
19 | |||
20 | """Module for downloading files from a pool of mirrors | ||
21 | |||
22 | DESCRIPTION | ||
23 | |||
24 | This module provides support for downloading files from a pool of | ||
25 | mirrors with configurable failover policies. To a large extent, the | ||
26 | failover policy is chosen by using different classes derived from | ||
27 | the main class, MirrorGroup. | ||
28 | |||
29 | Instances of MirrorGroup (and cousins) act very much like URLGrabber | ||
30 | instances in that they have urlread, urlgrab, and urlopen methods. | ||
31 | They can therefore, be used in very similar ways. | ||
32 | |||
33 | from urlgrabber.grabber import URLGrabber | ||
34 | from urlgrabber.mirror import MirrorGroup | ||
35 | gr = URLGrabber() | ||
36 | mg = MirrorGroup(gr, ['http://foo.com/some/directory/', | ||
37 | 'http://bar.org/maybe/somewhere/else/', | ||
38 | 'ftp://baz.net/some/other/place/entirely/'] | ||
39 | mg.urlgrab('relative/path.zip') | ||
40 | |||
41 | The assumption is that all mirrors are identical AFTER the base urls | ||
42 | specified, so that any mirror can be used to fetch any file. | ||
43 | |||
44 | FAILOVER | ||
45 | |||
46 | The failover mechanism is designed to be customized by subclassing | ||
47 | from MirrorGroup to change the details of the behavior. In general, | ||
48 | the classes maintain a master mirror list and a "current mirror" | ||
49 | index. When a download is initiated, a copy of this list and index | ||
50 | is created for that download only. The specific failover policy | ||
51 | depends on the class used, and so is documented in the class | ||
52 | documentation. Note that ANY behavior of the class can be | ||
53 | overridden, so any failover policy at all is possible (although | ||
54 | you may need to change the interface in extreme cases). | ||
55 | |||
56 | CUSTOMIZATION | ||
57 | |||
58 | Most customization of a MirrorGroup object is done at instantiation | ||
59 | time (or via subclassing). There are four major types of | ||
60 | customization: | ||
61 | |||
62 | 1) Pass in a custom urlgrabber - The passed in urlgrabber will be | ||
63 | used (by default... see #2) for the grabs, so options to it | ||
64 | apply for the url-fetching | ||
65 | |||
66 | 2) Custom mirror list - Mirror lists can simply be a list of | ||
67 | stings mirrors (as shown in the example above) but each can | ||
68 | also be a dict, allowing for more options. For example, the | ||
69 | first mirror in the list above could also have been: | ||
70 | |||
71 | {'mirror': 'http://foo.com/some/directory/', | ||
72 | 'grabber': <a custom grabber to be used for this mirror>, | ||
73 | 'kwargs': { <a dict of arguments passed to the grabber> }} | ||
74 | |||
75 | All mirrors are converted to this format internally. If | ||
76 | 'grabber' is omitted, the default grabber will be used. If | ||
77 | kwargs are omitted, then (duh) they will not be used. | ||
78 | |||
79 | 3) Pass keyword arguments when instantiating the mirror group. | ||
80 | See, for example, the failure_callback argument. | ||
81 | |||
82 | 4) Finally, any kwargs passed in for the specific file (to the | ||
83 | urlgrab method, for example) will be folded in. The options | ||
84 | passed into the grabber's urlXXX methods will override any | ||
85 | options specified in a custom mirror dict. | ||
86 | |||
87 | """ | ||
88 | |||
89 | # $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $ | ||
90 | |||
91 | import random | ||
92 | import thread # needed for locking to make this threadsafe | ||
93 | |||
94 | from grabber import URLGrabError, CallbackObject, DEBUG | ||
95 | |||
96 | try: | ||
97 | from i18n import _ | ||
98 | except ImportError, msg: | ||
99 | def _(st): return st | ||
100 | |||
101 | class GrabRequest: | ||
102 | """This is a dummy class used to hold information about the specific | ||
103 | request. For example, a single file. By maintaining this information | ||
104 | separately, we can accomplish two things: | ||
105 | |||
106 | 1) make it a little easier to be threadsafe | ||
107 | 2) have request-specific parameters | ||
108 | """ | ||
109 | pass | ||
110 | |||
111 | class MirrorGroup: | ||
112 | """Base Mirror class | ||
113 | |||
114 | Instances of this class are built with a grabber object and a list | ||
115 | of mirrors. Then all calls to urlXXX should be passed relative urls. | ||
116 | The requested file will be searched for on the first mirror. If the | ||
117 | grabber raises an exception (possibly after some retries) then that | ||
118 | mirror will be removed from the list, and the next will be attempted. | ||
119 | If all mirrors are exhausted, then an exception will be raised. | ||
120 | |||
121 | MirrorGroup has the following failover policy: | ||
122 | |||
123 | * downloads begin with the first mirror | ||
124 | |||
125 | * by default (see default_action below) a failure (after retries) | ||
126 | causes it to increment the local AND master indices. Also, | ||
127 | the current mirror is removed from the local list (but NOT the | ||
128 | master list - the mirror can potentially be used for other | ||
129 | files) | ||
130 | |||
131 | * if the local list is ever exhausted, a URLGrabError will be | ||
132 | raised (errno=256, no more mirrors) | ||
133 | |||
134 | OPTIONS | ||
135 | |||
136 | In addition to the required arguments "grabber" and "mirrors", | ||
137 | MirrorGroup also takes the following optional arguments: | ||
138 | |||
139 | default_action | ||
140 | |||
141 | A dict that describes the actions to be taken upon failure | ||
142 | (after retries). default_action can contain any of the | ||
143 | following keys (shown here with their default values): | ||
144 | |||
145 | default_action = {'increment': 1, | ||
146 | 'increment_master': 1, | ||
147 | 'remove': 1, | ||
148 | 'remove_master': 0, | ||
149 | 'fail': 0} | ||
150 | |||
151 | In this context, 'increment' means "use the next mirror" and | ||
152 | 'remove' means "never use this mirror again". The two | ||
153 | 'master' values refer to the instance-level mirror list (used | ||
154 | for all files), whereas the non-master values refer to the | ||
155 | current download only. | ||
156 | |||
157 | The 'fail' option will cause immediate failure by re-raising | ||
158 | the exception and no further attempts to get the current | ||
159 | download. | ||
160 | |||
161 | This dict can be set at instantiation time, | ||
162 | mg = MirrorGroup(grabber, mirrors, default_action={'fail':1}) | ||
163 | at method-execution time (only applies to current fetch), | ||
164 | filename = mg.urlgrab(url, default_action={'increment': 0}) | ||
165 | or by returning an action dict from the failure_callback | ||
166 | return {'fail':0} | ||
167 | in increasing precedence. | ||
168 | |||
169 | If all three of these were done, the net result would be: | ||
170 | {'increment': 0, # set in method | ||
171 | 'increment_master': 1, # class default | ||
172 | 'remove': 1, # class default | ||
173 | 'remove_master': 0, # class default | ||
174 | 'fail': 0} # set at instantiation, reset | ||
175 | # from callback | ||
176 | |||
177 | failure_callback | ||
178 | |||
179 | this is a callback that will be called when a mirror "fails", | ||
180 | meaning the grabber raises some URLGrabError. If this is a | ||
181 | tuple, it is interpreted to be of the form (cb, args, kwargs) | ||
182 | where cb is the actual callable object (function, method, | ||
183 | etc). Otherwise, it is assumed to be the callable object | ||
184 | itself. The callback will be passed a grabber.CallbackObject | ||
185 | instance along with args and kwargs (if present). The following | ||
186 | attributes are defined withing the instance: | ||
187 | |||
188 | obj.exception = < exception that was raised > | ||
189 | obj.mirror = < the mirror that was tried > | ||
190 | obj.relative_url = < url relative to the mirror > | ||
191 | obj.url = < full url that failed > | ||
192 | # .url is just the combination of .mirror | ||
193 | # and .relative_url | ||
194 | |||
195 | The failure callback can return an action dict, as described | ||
196 | above. | ||
197 | |||
198 | Like default_action, the failure_callback can be set at | ||
199 | instantiation time or when the urlXXX method is called. In | ||
200 | the latter case, it applies only for that fetch. | ||
201 | |||
202 | The callback can re-raise the exception quite easily. For | ||
203 | example, this is a perfectly adequate callback function: | ||
204 | |||
205 | def callback(obj): raise obj.exception | ||
206 | |||
207 | WARNING: do not save the exception object (or the | ||
208 | CallbackObject instance). As they contain stack frame | ||
209 | references, they can lead to circular references. | ||
210 | |||
211 | Notes: | ||
212 | * The behavior can be customized by deriving and overriding the | ||
213 | 'CONFIGURATION METHODS' | ||
214 | * The 'grabber' instance is kept as a reference, not copied. | ||
215 | Therefore, the grabber instance can be modified externally | ||
216 | and changes will take effect immediately. | ||
217 | """ | ||
218 | |||
219 | # notes on thread-safety: | ||
220 | |||
221 | # A GrabRequest should never be shared by multiple threads because | ||
222 | # it's never saved inside the MG object and never returned outside it. | ||
223 | # therefore, it should be safe to access/modify grabrequest data | ||
224 | # without a lock. However, accessing the mirrors and _next attributes | ||
225 | # of the MG itself must be done when locked to prevent (for example) | ||
226 | # removal of the wrong mirror. | ||
227 | |||
228 | ############################################################## | ||
229 | # CONFIGURATION METHODS - intended to be overridden to | ||
230 | # customize behavior | ||
231 | def __init__(self, grabber, mirrors, **kwargs): | ||
232 | """Initialize the MirrorGroup object. | ||
233 | |||
234 | REQUIRED ARGUMENTS | ||
235 | |||
236 | grabber - URLGrabber instance | ||
237 | mirrors - a list of mirrors | ||
238 | |||
239 | OPTIONAL ARGUMENTS | ||
240 | |||
241 | failure_callback - callback to be used when a mirror fails | ||
242 | default_action - dict of failure actions | ||
243 | |||
244 | See the module-level and class level documentation for more | ||
245 | details. | ||
246 | """ | ||
247 | |||
248 | # OVERRIDE IDEAS: | ||
249 | # shuffle the list to randomize order | ||
250 | self.grabber = grabber | ||
251 | self.mirrors = self._parse_mirrors(mirrors) | ||
252 | self._next = 0 | ||
253 | self._lock = thread.allocate_lock() | ||
254 | self.default_action = None | ||
255 | self._process_kwargs(kwargs) | ||
256 | |||
257 | # if these values are found in **kwargs passed to one of the urlXXX | ||
258 | # methods, they will be stripped before getting passed on to the | ||
259 | # grabber | ||
260 | options = ['default_action', 'failure_callback'] | ||
261 | |||
262 | def _process_kwargs(self, kwargs): | ||
263 | self.failure_callback = kwargs.get('failure_callback') | ||
264 | self.default_action = kwargs.get('default_action') | ||
265 | |||
266 | def _parse_mirrors(self, mirrors): | ||
267 | parsed_mirrors = [] | ||
268 | for m in mirrors: | ||
269 | if type(m) == type(''): m = {'mirror': m} | ||
270 | parsed_mirrors.append(m) | ||
271 | return parsed_mirrors | ||
272 | |||
273 | def _load_gr(self, gr): | ||
274 | # OVERRIDE IDEAS: | ||
275 | # shuffle gr list | ||
276 | self._lock.acquire() | ||
277 | gr.mirrors = list(self.mirrors) | ||
278 | gr._next = self._next | ||
279 | self._lock.release() | ||
280 | |||
281 | def _get_mirror(self, gr): | ||
282 | # OVERRIDE IDEAS: | ||
283 | # return a random mirror so that multiple mirrors get used | ||
284 | # even without failures. | ||
285 | if not gr.mirrors: | ||
286 | raise URLGrabError(256, _('No more mirrors to try.')) | ||
287 | return gr.mirrors[gr._next] | ||
288 | |||
289 | def _failure(self, gr, cb_obj): | ||
290 | # OVERRIDE IDEAS: | ||
291 | # inspect the error - remove=1 for 404, remove=2 for connection | ||
292 | # refused, etc. (this can also be done via | ||
293 | # the callback) | ||
294 | cb = gr.kw.get('failure_callback') or self.failure_callback | ||
295 | if cb: | ||
296 | if type(cb) == type( () ): | ||
297 | cb, args, kwargs = cb | ||
298 | else: | ||
299 | args, kwargs = (), {} | ||
300 | action = cb(cb_obj, *args, **kwargs) or {} | ||
301 | else: | ||
302 | action = {} | ||
303 | # XXXX - decide - there are two ways to do this | ||
304 | # the first is action-overriding as a whole - use the entire action | ||
305 | # or fall back on module level defaults | ||
306 | #action = action or gr.kw.get('default_action') or self.default_action | ||
307 | # the other is to fall through for each element in the action dict | ||
308 | a = dict(self.default_action or {}) | ||
309 | a.update(gr.kw.get('default_action', {})) | ||
310 | a.update(action) | ||
311 | action = a | ||
312 | self.increment_mirror(gr, action) | ||
313 | if action and action.get('fail', 0): raise | ||
314 | |||
315 | def increment_mirror(self, gr, action={}): | ||
316 | """Tell the mirror object increment the mirror index | ||
317 | |||
318 | This increments the mirror index, which amounts to telling the | ||
319 | mirror object to use a different mirror (for this and future | ||
320 | downloads). | ||
321 | |||
322 | This is a SEMI-public method. It will be called internally, | ||
323 | and you may never need to call it. However, it is provided | ||
324 | (and is made public) so that the calling program can increment | ||
325 | the mirror choice for methods like urlopen. For example, with | ||
326 | urlopen, there's no good way for the mirror group to know that | ||
327 | an error occurs mid-download (it's already returned and given | ||
328 | you the file object). | ||
329 | |||
330 | remove --- can have several values | ||
331 | 0 do not remove the mirror from the list | ||
332 | 1 remove the mirror for this download only | ||
333 | 2 remove the mirror permanently | ||
334 | |||
335 | beware of remove=0 as it can lead to infinite loops | ||
336 | """ | ||
337 | badmirror = gr.mirrors[gr._next] | ||
338 | |||
339 | self._lock.acquire() | ||
340 | try: | ||
341 | ind = self.mirrors.index(badmirror) | ||
342 | except ValueError: | ||
343 | pass | ||
344 | else: | ||
345 | if action.get('remove_master', 0): | ||
346 | del self.mirrors[ind] | ||
347 | elif self._next == ind and action.get('increment_master', 1): | ||
348 | self._next += 1 | ||
349 | if self._next >= len(self.mirrors): self._next = 0 | ||
350 | self._lock.release() | ||
351 | |||
352 | if action.get('remove', 1): | ||
353 | del gr.mirrors[gr._next] | ||
354 | elif action.get('increment', 1): | ||
355 | gr._next += 1 | ||
356 | if gr._next >= len(gr.mirrors): gr._next = 0 | ||
357 | |||
358 | if DEBUG: | ||
359 | grm = [m['mirror'] for m in gr.mirrors] | ||
360 | DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next) | ||
361 | selfm = [m['mirror'] for m in self.mirrors] | ||
362 | DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next) | ||
363 | |||
364 | ##################################################################### | ||
365 | # NON-CONFIGURATION METHODS | ||
366 | # these methods are designed to be largely workhorse methods that | ||
367 | # are not intended to be overridden. That doesn't mean you can't; | ||
368 | # if you want to, feel free, but most things can be done by | ||
369 | # by overriding the configuration methods :) | ||
370 | |||
371 | def _join_url(self, base_url, rel_url): | ||
372 | if base_url.endswith('/') or rel_url.startswith('/'): | ||
373 | return base_url + rel_url | ||
374 | else: | ||
375 | return base_url + '/' + rel_url | ||
376 | |||
377 | def _mirror_try(self, func, url, kw): | ||
378 | gr = GrabRequest() | ||
379 | gr.func = func | ||
380 | gr.url = url | ||
381 | gr.kw = dict(kw) | ||
382 | self._load_gr(gr) | ||
383 | |||
384 | for k in self.options: | ||
385 | try: del kw[k] | ||
386 | except KeyError: pass | ||
387 | |||
388 | while 1: | ||
389 | mirrorchoice = self._get_mirror(gr) | ||
390 | fullurl = self._join_url(mirrorchoice['mirror'], gr.url) | ||
391 | kwargs = dict(mirrorchoice.get('kwargs', {})) | ||
392 | kwargs.update(kw) | ||
393 | grabber = mirrorchoice.get('grabber') or self.grabber | ||
394 | func_ref = getattr(grabber, func) | ||
395 | if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) | ||
396 | try: | ||
397 | return func_ref( *(fullurl,), **kwargs ) | ||
398 | except URLGrabError, e: | ||
399 | if DEBUG: DEBUG.info('MIRROR: failed') | ||
400 | obj = CallbackObject() | ||
401 | obj.exception = e | ||
402 | obj.mirror = mirrorchoice['mirror'] | ||
403 | obj.relative_url = gr.url | ||
404 | obj.url = fullurl | ||
405 | self._failure(gr, obj) | ||
406 | |||
407 | def urlgrab(self, url, filename=None, **kwargs): | ||
408 | kw = dict(kwargs) | ||
409 | kw['filename'] = filename | ||
410 | func = 'urlgrab' | ||
411 | return self._mirror_try(func, url, kw) | ||
412 | |||
413 | def urlopen(self, url, **kwargs): | ||
414 | kw = dict(kwargs) | ||
415 | func = 'urlopen' | ||
416 | return self._mirror_try(func, url, kw) | ||
417 | |||
418 | def urlread(self, url, limit=None, **kwargs): | ||
419 | kw = dict(kwargs) | ||
420 | kw['limit'] = limit | ||
421 | func = 'urlread' | ||
422 | return self._mirror_try(func, url, kw) | ||
423 | |||
424 | |||
425 | class MGRandomStart(MirrorGroup): | ||
426 | """A mirror group that starts at a random mirror in the list. | ||
427 | |||
428 | This behavior of this class is identical to MirrorGroup, except that | ||
429 | it starts at a random location in the mirror list. | ||
430 | """ | ||
431 | |||
432 | def __init__(self, grabber, mirrors, **kwargs): | ||
433 | """Initialize the object | ||
434 | |||
435 | The arguments for intialization are the same as for MirrorGroup | ||
436 | """ | ||
437 | MirrorGroup.__init__(self, grabber, mirrors, **kwargs) | ||
438 | self._next = random.randrange(len(mirrors)) | ||
439 | |||
440 | class MGRandomOrder(MirrorGroup): | ||
441 | """A mirror group that uses mirrors in a random order. | ||
442 | |||
443 | This behavior of this class is identical to MirrorGroup, except that | ||
444 | it uses the mirrors in a random order. Note that the order is set at | ||
445 | initialization time and fixed thereafter. That is, it does not pick a | ||
446 | random mirror after each failure. | ||
447 | """ | ||
448 | |||
449 | def __init__(self, grabber, mirrors, **kwargs): | ||
450 | """Initialize the object | ||
451 | |||
452 | The arguments for intialization are the same as for MirrorGroup | ||
453 | """ | ||
454 | MirrorGroup.__init__(self, grabber, mirrors, **kwargs) | ||
455 | random.shuffle(self.mirrors) | ||
456 | |||
457 | if __name__ == '__main__': | ||
458 | pass | ||