summaryrefslogtreecommitdiffstats
path: root/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py')
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py458
1 files changed, 458 insertions, 0 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
new file mode 100644
index 0000000000..9664c6b5c5
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py
@@ -0,0 +1,458 @@
1# This library is free software; you can redistribute it and/or
2# modify it under the terms of the GNU Lesser General Public
3# License as published by the Free Software Foundation; either
4# version 2.1 of the License, or (at your option) any later version.
5#
6# This library is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9# Lesser General Public License for more details.
10#
11# You should have received a copy of the GNU Lesser General Public
12# License along with this library; if not, write to the
13# Free Software Foundation, Inc.,
14# 59 Temple Place, Suite 330,
15# Boston, MA 02111-1307 USA
16
17# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
20"""Module for downloading files from a pool of mirrors
21
22DESCRIPTION
23
24 This module provides support for downloading files from a pool of
25 mirrors with configurable failover policies. To a large extent, the
26 failover policy is chosen by using different classes derived from
27 the main class, MirrorGroup.
28
29 Instances of MirrorGroup (and cousins) act very much like URLGrabber
30 instances in that they have urlread, urlgrab, and urlopen methods.
31 They can therefore, be used in very similar ways.
32
33 from urlgrabber.grabber import URLGrabber
34 from urlgrabber.mirror import MirrorGroup
35 gr = URLGrabber()
36 mg = MirrorGroup(gr, ['http://foo.com/some/directory/',
37 'http://bar.org/maybe/somewhere/else/',
38 'ftp://baz.net/some/other/place/entirely/']
39 mg.urlgrab('relative/path.zip')
40
41 The assumption is that all mirrors are identical AFTER the base urls
42 specified, so that any mirror can be used to fetch any file.
43
44FAILOVER
45
46 The failover mechanism is designed to be customized by subclassing
47 from MirrorGroup to change the details of the behavior. In general,
48 the classes maintain a master mirror list and a "current mirror"
49 index. When a download is initiated, a copy of this list and index
50 is created for that download only. The specific failover policy
51 depends on the class used, and so is documented in the class
52 documentation. Note that ANY behavior of the class can be
53 overridden, so any failover policy at all is possible (although
54 you may need to change the interface in extreme cases).
55
56CUSTOMIZATION
57
58 Most customization of a MirrorGroup object is done at instantiation
59 time (or via subclassing). There are four major types of
60 customization:
61
62 1) Pass in a custom urlgrabber - The passed in urlgrabber will be
63 used (by default... see #2) for the grabs, so options to it
64 apply for the url-fetching
65
66 2) Custom mirror list - Mirror lists can simply be a list of
67 stings mirrors (as shown in the example above) but each can
68 also be a dict, allowing for more options. For example, the
69 first mirror in the list above could also have been:
70
71 {'mirror': 'http://foo.com/some/directory/',
72 'grabber': <a custom grabber to be used for this mirror>,
73 'kwargs': { <a dict of arguments passed to the grabber> }}
74
75 All mirrors are converted to this format internally. If
76 'grabber' is omitted, the default grabber will be used. If
77 kwargs are omitted, then (duh) they will not be used.
78
79 3) Pass keyword arguments when instantiating the mirror group.
80 See, for example, the failure_callback argument.
81
82 4) Finally, any kwargs passed in for the specific file (to the
83 urlgrab method, for example) will be folded in. The options
84 passed into the grabber's urlXXX methods will override any
85 options specified in a custom mirror dict.
86
87"""
88
89# $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $
90
91import random
92import thread # needed for locking to make this threadsafe
93
94from grabber import URLGrabError, CallbackObject, DEBUG
95
96try:
97 from i18n import _
98except ImportError, msg:
99 def _(st): return st
100
101class GrabRequest:
102 """This is a dummy class used to hold information about the specific
103 request. For example, a single file. By maintaining this information
104 separately, we can accomplish two things:
105
106 1) make it a little easier to be threadsafe
107 2) have request-specific parameters
108 """
109 pass
110
111class MirrorGroup:
112 """Base Mirror class
113
114 Instances of this class are built with a grabber object and a list
115 of mirrors. Then all calls to urlXXX should be passed relative urls.
116 The requested file will be searched for on the first mirror. If the
117 grabber raises an exception (possibly after some retries) then that
118 mirror will be removed from the list, and the next will be attempted.
119 If all mirrors are exhausted, then an exception will be raised.
120
121 MirrorGroup has the following failover policy:
122
123 * downloads begin with the first mirror
124
125 * by default (see default_action below) a failure (after retries)
126 causes it to increment the local AND master indices. Also,
127 the current mirror is removed from the local list (but NOT the
128 master list - the mirror can potentially be used for other
129 files)
130
131 * if the local list is ever exhausted, a URLGrabError will be
132 raised (errno=256, no more mirrors)
133
134 OPTIONS
135
136 In addition to the required arguments "grabber" and "mirrors",
137 MirrorGroup also takes the following optional arguments:
138
139 default_action
140
141 A dict that describes the actions to be taken upon failure
142 (after retries). default_action can contain any of the
143 following keys (shown here with their default values):
144
145 default_action = {'increment': 1,
146 'increment_master': 1,
147 'remove': 1,
148 'remove_master': 0,
149 'fail': 0}
150
151 In this context, 'increment' means "use the next mirror" and
152 'remove' means "never use this mirror again". The two
153 'master' values refer to the instance-level mirror list (used
154 for all files), whereas the non-master values refer to the
155 current download only.
156
157 The 'fail' option will cause immediate failure by re-raising
158 the exception and no further attempts to get the current
159 download.
160
161 This dict can be set at instantiation time,
162 mg = MirrorGroup(grabber, mirrors, default_action={'fail':1})
163 at method-execution time (only applies to current fetch),
164 filename = mg.urlgrab(url, default_action={'increment': 0})
165 or by returning an action dict from the failure_callback
166 return {'fail':0}
167 in increasing precedence.
168
169 If all three of these were done, the net result would be:
170 {'increment': 0, # set in method
171 'increment_master': 1, # class default
172 'remove': 1, # class default
173 'remove_master': 0, # class default
174 'fail': 0} # set at instantiation, reset
175 # from callback
176
177 failure_callback
178
179 this is a callback that will be called when a mirror "fails",
180 meaning the grabber raises some URLGrabError. If this is a
181 tuple, it is interpreted to be of the form (cb, args, kwargs)
182 where cb is the actual callable object (function, method,
183 etc). Otherwise, it is assumed to be the callable object
184 itself. The callback will be passed a grabber.CallbackObject
185 instance along with args and kwargs (if present). The following
186 attributes are defined withing the instance:
187
188 obj.exception = < exception that was raised >
189 obj.mirror = < the mirror that was tried >
190 obj.relative_url = < url relative to the mirror >
191 obj.url = < full url that failed >
192 # .url is just the combination of .mirror
193 # and .relative_url
194
195 The failure callback can return an action dict, as described
196 above.
197
198 Like default_action, the failure_callback can be set at
199 instantiation time or when the urlXXX method is called. In
200 the latter case, it applies only for that fetch.
201
202 The callback can re-raise the exception quite easily. For
203 example, this is a perfectly adequate callback function:
204
205 def callback(obj): raise obj.exception
206
207 WARNING: do not save the exception object (or the
208 CallbackObject instance). As they contain stack frame
209 references, they can lead to circular references.
210
211 Notes:
212 * The behavior can be customized by deriving and overriding the
213 'CONFIGURATION METHODS'
214 * The 'grabber' instance is kept as a reference, not copied.
215 Therefore, the grabber instance can be modified externally
216 and changes will take effect immediately.
217 """
218
219 # notes on thread-safety:
220
221 # A GrabRequest should never be shared by multiple threads because
222 # it's never saved inside the MG object and never returned outside it.
223 # therefore, it should be safe to access/modify grabrequest data
224 # without a lock. However, accessing the mirrors and _next attributes
225 # of the MG itself must be done when locked to prevent (for example)
226 # removal of the wrong mirror.
227
228 ##############################################################
229 # CONFIGURATION METHODS - intended to be overridden to
230 # customize behavior
231 def __init__(self, grabber, mirrors, **kwargs):
232 """Initialize the MirrorGroup object.
233
234 REQUIRED ARGUMENTS
235
236 grabber - URLGrabber instance
237 mirrors - a list of mirrors
238
239 OPTIONAL ARGUMENTS
240
241 failure_callback - callback to be used when a mirror fails
242 default_action - dict of failure actions
243
244 See the module-level and class level documentation for more
245 details.
246 """
247
248 # OVERRIDE IDEAS:
249 # shuffle the list to randomize order
250 self.grabber = grabber
251 self.mirrors = self._parse_mirrors(mirrors)
252 self._next = 0
253 self._lock = thread.allocate_lock()
254 self.default_action = None
255 self._process_kwargs(kwargs)
256
257 # if these values are found in **kwargs passed to one of the urlXXX
258 # methods, they will be stripped before getting passed on to the
259 # grabber
260 options = ['default_action', 'failure_callback']
261
262 def _process_kwargs(self, kwargs):
263 self.failure_callback = kwargs.get('failure_callback')
264 self.default_action = kwargs.get('default_action')
265
266 def _parse_mirrors(self, mirrors):
267 parsed_mirrors = []
268 for m in mirrors:
269 if type(m) == type(''): m = {'mirror': m}
270 parsed_mirrors.append(m)
271 return parsed_mirrors
272
273 def _load_gr(self, gr):
274 # OVERRIDE IDEAS:
275 # shuffle gr list
276 self._lock.acquire()
277 gr.mirrors = list(self.mirrors)
278 gr._next = self._next
279 self._lock.release()
280
281 def _get_mirror(self, gr):
282 # OVERRIDE IDEAS:
283 # return a random mirror so that multiple mirrors get used
284 # even without failures.
285 if not gr.mirrors:
286 raise URLGrabError(256, _('No more mirrors to try.'))
287 return gr.mirrors[gr._next]
288
289 def _failure(self, gr, cb_obj):
290 # OVERRIDE IDEAS:
291 # inspect the error - remove=1 for 404, remove=2 for connection
292 # refused, etc. (this can also be done via
293 # the callback)
294 cb = gr.kw.get('failure_callback') or self.failure_callback
295 if cb:
296 if type(cb) == type( () ):
297 cb, args, kwargs = cb
298 else:
299 args, kwargs = (), {}
300 action = cb(cb_obj, *args, **kwargs) or {}
301 else:
302 action = {}
303 # XXXX - decide - there are two ways to do this
304 # the first is action-overriding as a whole - use the entire action
305 # or fall back on module level defaults
306 #action = action or gr.kw.get('default_action') or self.default_action
307 # the other is to fall through for each element in the action dict
308 a = dict(self.default_action or {})
309 a.update(gr.kw.get('default_action', {}))
310 a.update(action)
311 action = a
312 self.increment_mirror(gr, action)
313 if action and action.get('fail', 0): raise
314
315 def increment_mirror(self, gr, action={}):
316 """Tell the mirror object increment the mirror index
317
318 This increments the mirror index, which amounts to telling the
319 mirror object to use a different mirror (for this and future
320 downloads).
321
322 This is a SEMI-public method. It will be called internally,
323 and you may never need to call it. However, it is provided
324 (and is made public) so that the calling program can increment
325 the mirror choice for methods like urlopen. For example, with
326 urlopen, there's no good way for the mirror group to know that
327 an error occurs mid-download (it's already returned and given
328 you the file object).
329
330 remove --- can have several values
331 0 do not remove the mirror from the list
332 1 remove the mirror for this download only
333 2 remove the mirror permanently
334
335 beware of remove=0 as it can lead to infinite loops
336 """
337 badmirror = gr.mirrors[gr._next]
338
339 self._lock.acquire()
340 try:
341 ind = self.mirrors.index(badmirror)
342 except ValueError:
343 pass
344 else:
345 if action.get('remove_master', 0):
346 del self.mirrors[ind]
347 elif self._next == ind and action.get('increment_master', 1):
348 self._next += 1
349 if self._next >= len(self.mirrors): self._next = 0
350 self._lock.release()
351
352 if action.get('remove', 1):
353 del gr.mirrors[gr._next]
354 elif action.get('increment', 1):
355 gr._next += 1
356 if gr._next >= len(gr.mirrors): gr._next = 0
357
358 if DEBUG:
359 grm = [m['mirror'] for m in gr.mirrors]
360 DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next)
361 selfm = [m['mirror'] for m in self.mirrors]
362 DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next)
363
364 #####################################################################
365 # NON-CONFIGURATION METHODS
366 # these methods are designed to be largely workhorse methods that
367 # are not intended to be overridden. That doesn't mean you can't;
368 # if you want to, feel free, but most things can be done by
369 # by overriding the configuration methods :)
370
371 def _join_url(self, base_url, rel_url):
372 if base_url.endswith('/') or rel_url.startswith('/'):
373 return base_url + rel_url
374 else:
375 return base_url + '/' + rel_url
376
377 def _mirror_try(self, func, url, kw):
378 gr = GrabRequest()
379 gr.func = func
380 gr.url = url
381 gr.kw = dict(kw)
382 self._load_gr(gr)
383
384 for k in self.options:
385 try: del kw[k]
386 except KeyError: pass
387
388 while 1:
389 mirrorchoice = self._get_mirror(gr)
390 fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
391 kwargs = dict(mirrorchoice.get('kwargs', {}))
392 kwargs.update(kw)
393 grabber = mirrorchoice.get('grabber') or self.grabber
394 func_ref = getattr(grabber, func)
395 if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
396 try:
397 return func_ref( *(fullurl,), **kwargs )
398 except URLGrabError, e:
399 if DEBUG: DEBUG.info('MIRROR: failed')
400 obj = CallbackObject()
401 obj.exception = e
402 obj.mirror = mirrorchoice['mirror']
403 obj.relative_url = gr.url
404 obj.url = fullurl
405 self._failure(gr, obj)
406
407 def urlgrab(self, url, filename=None, **kwargs):
408 kw = dict(kwargs)
409 kw['filename'] = filename
410 func = 'urlgrab'
411 return self._mirror_try(func, url, kw)
412
413 def urlopen(self, url, **kwargs):
414 kw = dict(kwargs)
415 func = 'urlopen'
416 return self._mirror_try(func, url, kw)
417
418 def urlread(self, url, limit=None, **kwargs):
419 kw = dict(kwargs)
420 kw['limit'] = limit
421 func = 'urlread'
422 return self._mirror_try(func, url, kw)
423
424
425class MGRandomStart(MirrorGroup):
426 """A mirror group that starts at a random mirror in the list.
427
428 This behavior of this class is identical to MirrorGroup, except that
429 it starts at a random location in the mirror list.
430 """
431
432 def __init__(self, grabber, mirrors, **kwargs):
433 """Initialize the object
434
435 The arguments for intialization are the same as for MirrorGroup
436 """
437 MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
438 self._next = random.randrange(len(mirrors))
439
440class MGRandomOrder(MirrorGroup):
441 """A mirror group that uses mirrors in a random order.
442
443 This behavior of this class is identical to MirrorGroup, except that
444 it uses the mirrors in a random order. Note that the order is set at
445 initialization time and fixed thereafter. That is, it does not pick a
446 random mirror after each failure.
447 """
448
449 def __init__(self, grabber, mirrors, **kwargs):
450 """Initialize the object
451
452 The arguments for intialization are the same as for MirrorGroup
453 """
454 MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
455 random.shuffle(self.mirrors)
456
457if __name__ == '__main__':
458 pass