diff options
| author | Adrian Dudau <adrian.dudau@enea.com> | 2013-12-12 13:38:32 +0100 |
|---|---|---|
| committer | Adrian Dudau <adrian.dudau@enea.com> | 2013-12-12 13:50:20 +0100 |
| commit | e2e6f6fe07049f33cb6348780fa975162752e421 (patch) | |
| tree | b1813295411235d1297a0ed642b1346b24fdfb12 /scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py | |
| download | poky-e2e6f6fe07049f33cb6348780fa975162752e421.tar.gz | |
initial commit of Enea Linux 3.1
Migrated from the internal git server on the dora-enea branch
Signed-off-by: Adrian Dudau <adrian.dudau@enea.com>
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py')
| -rw-r--r-- | scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py | 458 |
1 files changed, 458 insertions, 0 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py new file mode 100644 index 0000000000..9664c6b5c5 --- /dev/null +++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/mirror.py | |||
| @@ -0,0 +1,458 @@ | |||
| 1 | # This library is free software; you can redistribute it and/or | ||
| 2 | # modify it under the terms of the GNU Lesser General Public | ||
| 3 | # License as published by the Free Software Foundation; either | ||
| 4 | # version 2.1 of the License, or (at your option) any later version. | ||
| 5 | # | ||
| 6 | # This library is distributed in the hope that it will be useful, | ||
| 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 9 | # Lesser General Public License for more details. | ||
| 10 | # | ||
| 11 | # You should have received a copy of the GNU Lesser General Public | ||
| 12 | # License along with this library; if not, write to the | ||
| 13 | # Free Software Foundation, Inc., | ||
| 14 | # 59 Temple Place, Suite 330, | ||
| 15 | # Boston, MA 02111-1307 USA | ||
| 16 | |||
| 17 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||
| 18 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | ||
| 19 | |||
| 20 | """Module for downloading files from a pool of mirrors | ||
| 21 | |||
| 22 | DESCRIPTION | ||
| 23 | |||
| 24 | This module provides support for downloading files from a pool of | ||
| 25 | mirrors with configurable failover policies. To a large extent, the | ||
| 26 | failover policy is chosen by using different classes derived from | ||
| 27 | the main class, MirrorGroup. | ||
| 28 | |||
| 29 | Instances of MirrorGroup (and cousins) act very much like URLGrabber | ||
| 30 | instances in that they have urlread, urlgrab, and urlopen methods. | ||
| 31 | They can therefore, be used in very similar ways. | ||
| 32 | |||
| 33 | from urlgrabber.grabber import URLGrabber | ||
| 34 | from urlgrabber.mirror import MirrorGroup | ||
| 35 | gr = URLGrabber() | ||
| 36 | mg = MirrorGroup(gr, ['http://foo.com/some/directory/', | ||
| 37 | 'http://bar.org/maybe/somewhere/else/', | ||
| 38 | 'ftp://baz.net/some/other/place/entirely/'] | ||
| 39 | mg.urlgrab('relative/path.zip') | ||
| 40 | |||
| 41 | The assumption is that all mirrors are identical AFTER the base urls | ||
| 42 | specified, so that any mirror can be used to fetch any file. | ||
| 43 | |||
| 44 | FAILOVER | ||
| 45 | |||
| 46 | The failover mechanism is designed to be customized by subclassing | ||
| 47 | from MirrorGroup to change the details of the behavior. In general, | ||
| 48 | the classes maintain a master mirror list and a "current mirror" | ||
| 49 | index. When a download is initiated, a copy of this list and index | ||
| 50 | is created for that download only. The specific failover policy | ||
| 51 | depends on the class used, and so is documented in the class | ||
| 52 | documentation. Note that ANY behavior of the class can be | ||
| 53 | overridden, so any failover policy at all is possible (although | ||
| 54 | you may need to change the interface in extreme cases). | ||
| 55 | |||
| 56 | CUSTOMIZATION | ||
| 57 | |||
| 58 | Most customization of a MirrorGroup object is done at instantiation | ||
| 59 | time (or via subclassing). There are four major types of | ||
| 60 | customization: | ||
| 61 | |||
| 62 | 1) Pass in a custom urlgrabber - The passed in urlgrabber will be | ||
| 63 | used (by default... see #2) for the grabs, so options to it | ||
| 64 | apply for the url-fetching | ||
| 65 | |||
| 66 | 2) Custom mirror list - Mirror lists can simply be a list of | ||
| 67 | stings mirrors (as shown in the example above) but each can | ||
| 68 | also be a dict, allowing for more options. For example, the | ||
| 69 | first mirror in the list above could also have been: | ||
| 70 | |||
| 71 | {'mirror': 'http://foo.com/some/directory/', | ||
| 72 | 'grabber': <a custom grabber to be used for this mirror>, | ||
| 73 | 'kwargs': { <a dict of arguments passed to the grabber> }} | ||
| 74 | |||
| 75 | All mirrors are converted to this format internally. If | ||
| 76 | 'grabber' is omitted, the default grabber will be used. If | ||
| 77 | kwargs are omitted, then (duh) they will not be used. | ||
| 78 | |||
| 79 | 3) Pass keyword arguments when instantiating the mirror group. | ||
| 80 | See, for example, the failure_callback argument. | ||
| 81 | |||
| 82 | 4) Finally, any kwargs passed in for the specific file (to the | ||
| 83 | urlgrab method, for example) will be folded in. The options | ||
| 84 | passed into the grabber's urlXXX methods will override any | ||
| 85 | options specified in a custom mirror dict. | ||
| 86 | |||
| 87 | """ | ||
| 88 | |||
| 89 | # $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $ | ||
| 90 | |||
| 91 | import random | ||
| 92 | import thread # needed for locking to make this threadsafe | ||
| 93 | |||
| 94 | from grabber import URLGrabError, CallbackObject, DEBUG | ||
| 95 | |||
| 96 | try: | ||
| 97 | from i18n import _ | ||
| 98 | except ImportError, msg: | ||
| 99 | def _(st): return st | ||
| 100 | |||
| 101 | class GrabRequest: | ||
| 102 | """This is a dummy class used to hold information about the specific | ||
| 103 | request. For example, a single file. By maintaining this information | ||
| 104 | separately, we can accomplish two things: | ||
| 105 | |||
| 106 | 1) make it a little easier to be threadsafe | ||
| 107 | 2) have request-specific parameters | ||
| 108 | """ | ||
| 109 | pass | ||
| 110 | |||
| 111 | class MirrorGroup: | ||
| 112 | """Base Mirror class | ||
| 113 | |||
| 114 | Instances of this class are built with a grabber object and a list | ||
| 115 | of mirrors. Then all calls to urlXXX should be passed relative urls. | ||
| 116 | The requested file will be searched for on the first mirror. If the | ||
| 117 | grabber raises an exception (possibly after some retries) then that | ||
| 118 | mirror will be removed from the list, and the next will be attempted. | ||
| 119 | If all mirrors are exhausted, then an exception will be raised. | ||
| 120 | |||
| 121 | MirrorGroup has the following failover policy: | ||
| 122 | |||
| 123 | * downloads begin with the first mirror | ||
| 124 | |||
| 125 | * by default (see default_action below) a failure (after retries) | ||
| 126 | causes it to increment the local AND master indices. Also, | ||
| 127 | the current mirror is removed from the local list (but NOT the | ||
| 128 | master list - the mirror can potentially be used for other | ||
| 129 | files) | ||
| 130 | |||
| 131 | * if the local list is ever exhausted, a URLGrabError will be | ||
| 132 | raised (errno=256, no more mirrors) | ||
| 133 | |||
| 134 | OPTIONS | ||
| 135 | |||
| 136 | In addition to the required arguments "grabber" and "mirrors", | ||
| 137 | MirrorGroup also takes the following optional arguments: | ||
| 138 | |||
| 139 | default_action | ||
| 140 | |||
| 141 | A dict that describes the actions to be taken upon failure | ||
| 142 | (after retries). default_action can contain any of the | ||
| 143 | following keys (shown here with their default values): | ||
| 144 | |||
| 145 | default_action = {'increment': 1, | ||
| 146 | 'increment_master': 1, | ||
| 147 | 'remove': 1, | ||
| 148 | 'remove_master': 0, | ||
| 149 | 'fail': 0} | ||
| 150 | |||
| 151 | In this context, 'increment' means "use the next mirror" and | ||
| 152 | 'remove' means "never use this mirror again". The two | ||
| 153 | 'master' values refer to the instance-level mirror list (used | ||
| 154 | for all files), whereas the non-master values refer to the | ||
| 155 | current download only. | ||
| 156 | |||
| 157 | The 'fail' option will cause immediate failure by re-raising | ||
| 158 | the exception and no further attempts to get the current | ||
| 159 | download. | ||
| 160 | |||
| 161 | This dict can be set at instantiation time, | ||
| 162 | mg = MirrorGroup(grabber, mirrors, default_action={'fail':1}) | ||
| 163 | at method-execution time (only applies to current fetch), | ||
| 164 | filename = mg.urlgrab(url, default_action={'increment': 0}) | ||
| 165 | or by returning an action dict from the failure_callback | ||
| 166 | return {'fail':0} | ||
| 167 | in increasing precedence. | ||
| 168 | |||
| 169 | If all three of these were done, the net result would be: | ||
| 170 | {'increment': 0, # set in method | ||
| 171 | 'increment_master': 1, # class default | ||
| 172 | 'remove': 1, # class default | ||
| 173 | 'remove_master': 0, # class default | ||
| 174 | 'fail': 0} # set at instantiation, reset | ||
| 175 | # from callback | ||
| 176 | |||
| 177 | failure_callback | ||
| 178 | |||
| 179 | this is a callback that will be called when a mirror "fails", | ||
| 180 | meaning the grabber raises some URLGrabError. If this is a | ||
| 181 | tuple, it is interpreted to be of the form (cb, args, kwargs) | ||
| 182 | where cb is the actual callable object (function, method, | ||
| 183 | etc). Otherwise, it is assumed to be the callable object | ||
| 184 | itself. The callback will be passed a grabber.CallbackObject | ||
| 185 | instance along with args and kwargs (if present). The following | ||
| 186 | attributes are defined withing the instance: | ||
| 187 | |||
| 188 | obj.exception = < exception that was raised > | ||
| 189 | obj.mirror = < the mirror that was tried > | ||
| 190 | obj.relative_url = < url relative to the mirror > | ||
| 191 | obj.url = < full url that failed > | ||
| 192 | # .url is just the combination of .mirror | ||
| 193 | # and .relative_url | ||
| 194 | |||
| 195 | The failure callback can return an action dict, as described | ||
| 196 | above. | ||
| 197 | |||
| 198 | Like default_action, the failure_callback can be set at | ||
| 199 | instantiation time or when the urlXXX method is called. In | ||
| 200 | the latter case, it applies only for that fetch. | ||
| 201 | |||
| 202 | The callback can re-raise the exception quite easily. For | ||
| 203 | example, this is a perfectly adequate callback function: | ||
| 204 | |||
| 205 | def callback(obj): raise obj.exception | ||
| 206 | |||
| 207 | WARNING: do not save the exception object (or the | ||
| 208 | CallbackObject instance). As they contain stack frame | ||
| 209 | references, they can lead to circular references. | ||
| 210 | |||
| 211 | Notes: | ||
| 212 | * The behavior can be customized by deriving and overriding the | ||
| 213 | 'CONFIGURATION METHODS' | ||
| 214 | * The 'grabber' instance is kept as a reference, not copied. | ||
| 215 | Therefore, the grabber instance can be modified externally | ||
| 216 | and changes will take effect immediately. | ||
| 217 | """ | ||
| 218 | |||
| 219 | # notes on thread-safety: | ||
| 220 | |||
| 221 | # A GrabRequest should never be shared by multiple threads because | ||
| 222 | # it's never saved inside the MG object and never returned outside it. | ||
| 223 | # therefore, it should be safe to access/modify grabrequest data | ||
| 224 | # without a lock. However, accessing the mirrors and _next attributes | ||
| 225 | # of the MG itself must be done when locked to prevent (for example) | ||
| 226 | # removal of the wrong mirror. | ||
| 227 | |||
| 228 | ############################################################## | ||
| 229 | # CONFIGURATION METHODS - intended to be overridden to | ||
| 230 | # customize behavior | ||
| 231 | def __init__(self, grabber, mirrors, **kwargs): | ||
| 232 | """Initialize the MirrorGroup object. | ||
| 233 | |||
| 234 | REQUIRED ARGUMENTS | ||
| 235 | |||
| 236 | grabber - URLGrabber instance | ||
| 237 | mirrors - a list of mirrors | ||
| 238 | |||
| 239 | OPTIONAL ARGUMENTS | ||
| 240 | |||
| 241 | failure_callback - callback to be used when a mirror fails | ||
| 242 | default_action - dict of failure actions | ||
| 243 | |||
| 244 | See the module-level and class level documentation for more | ||
| 245 | details. | ||
| 246 | """ | ||
| 247 | |||
| 248 | # OVERRIDE IDEAS: | ||
| 249 | # shuffle the list to randomize order | ||
| 250 | self.grabber = grabber | ||
| 251 | self.mirrors = self._parse_mirrors(mirrors) | ||
| 252 | self._next = 0 | ||
| 253 | self._lock = thread.allocate_lock() | ||
| 254 | self.default_action = None | ||
| 255 | self._process_kwargs(kwargs) | ||
| 256 | |||
| 257 | # if these values are found in **kwargs passed to one of the urlXXX | ||
| 258 | # methods, they will be stripped before getting passed on to the | ||
| 259 | # grabber | ||
| 260 | options = ['default_action', 'failure_callback'] | ||
| 261 | |||
| 262 | def _process_kwargs(self, kwargs): | ||
| 263 | self.failure_callback = kwargs.get('failure_callback') | ||
| 264 | self.default_action = kwargs.get('default_action') | ||
| 265 | |||
| 266 | def _parse_mirrors(self, mirrors): | ||
| 267 | parsed_mirrors = [] | ||
| 268 | for m in mirrors: | ||
| 269 | if type(m) == type(''): m = {'mirror': m} | ||
| 270 | parsed_mirrors.append(m) | ||
| 271 | return parsed_mirrors | ||
| 272 | |||
| 273 | def _load_gr(self, gr): | ||
| 274 | # OVERRIDE IDEAS: | ||
| 275 | # shuffle gr list | ||
| 276 | self._lock.acquire() | ||
| 277 | gr.mirrors = list(self.mirrors) | ||
| 278 | gr._next = self._next | ||
| 279 | self._lock.release() | ||
| 280 | |||
| 281 | def _get_mirror(self, gr): | ||
| 282 | # OVERRIDE IDEAS: | ||
| 283 | # return a random mirror so that multiple mirrors get used | ||
| 284 | # even without failures. | ||
| 285 | if not gr.mirrors: | ||
| 286 | raise URLGrabError(256, _('No more mirrors to try.')) | ||
| 287 | return gr.mirrors[gr._next] | ||
| 288 | |||
| 289 | def _failure(self, gr, cb_obj): | ||
| 290 | # OVERRIDE IDEAS: | ||
| 291 | # inspect the error - remove=1 for 404, remove=2 for connection | ||
| 292 | # refused, etc. (this can also be done via | ||
| 293 | # the callback) | ||
| 294 | cb = gr.kw.get('failure_callback') or self.failure_callback | ||
| 295 | if cb: | ||
| 296 | if type(cb) == type( () ): | ||
| 297 | cb, args, kwargs = cb | ||
| 298 | else: | ||
| 299 | args, kwargs = (), {} | ||
| 300 | action = cb(cb_obj, *args, **kwargs) or {} | ||
| 301 | else: | ||
| 302 | action = {} | ||
| 303 | # XXXX - decide - there are two ways to do this | ||
| 304 | # the first is action-overriding as a whole - use the entire action | ||
| 305 | # or fall back on module level defaults | ||
| 306 | #action = action or gr.kw.get('default_action') or self.default_action | ||
| 307 | # the other is to fall through for each element in the action dict | ||
| 308 | a = dict(self.default_action or {}) | ||
| 309 | a.update(gr.kw.get('default_action', {})) | ||
| 310 | a.update(action) | ||
| 311 | action = a | ||
| 312 | self.increment_mirror(gr, action) | ||
| 313 | if action and action.get('fail', 0): raise | ||
| 314 | |||
| 315 | def increment_mirror(self, gr, action={}): | ||
| 316 | """Tell the mirror object increment the mirror index | ||
| 317 | |||
| 318 | This increments the mirror index, which amounts to telling the | ||
| 319 | mirror object to use a different mirror (for this and future | ||
| 320 | downloads). | ||
| 321 | |||
| 322 | This is a SEMI-public method. It will be called internally, | ||
| 323 | and you may never need to call it. However, it is provided | ||
| 324 | (and is made public) so that the calling program can increment | ||
| 325 | the mirror choice for methods like urlopen. For example, with | ||
| 326 | urlopen, there's no good way for the mirror group to know that | ||
| 327 | an error occurs mid-download (it's already returned and given | ||
| 328 | you the file object). | ||
| 329 | |||
| 330 | remove --- can have several values | ||
| 331 | 0 do not remove the mirror from the list | ||
| 332 | 1 remove the mirror for this download only | ||
| 333 | 2 remove the mirror permanently | ||
| 334 | |||
| 335 | beware of remove=0 as it can lead to infinite loops | ||
| 336 | """ | ||
| 337 | badmirror = gr.mirrors[gr._next] | ||
| 338 | |||
| 339 | self._lock.acquire() | ||
| 340 | try: | ||
| 341 | ind = self.mirrors.index(badmirror) | ||
| 342 | except ValueError: | ||
| 343 | pass | ||
| 344 | else: | ||
| 345 | if action.get('remove_master', 0): | ||
| 346 | del self.mirrors[ind] | ||
| 347 | elif self._next == ind and action.get('increment_master', 1): | ||
| 348 | self._next += 1 | ||
| 349 | if self._next >= len(self.mirrors): self._next = 0 | ||
| 350 | self._lock.release() | ||
| 351 | |||
| 352 | if action.get('remove', 1): | ||
| 353 | del gr.mirrors[gr._next] | ||
| 354 | elif action.get('increment', 1): | ||
| 355 | gr._next += 1 | ||
| 356 | if gr._next >= len(gr.mirrors): gr._next = 0 | ||
| 357 | |||
| 358 | if DEBUG: | ||
| 359 | grm = [m['mirror'] for m in gr.mirrors] | ||
| 360 | DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next) | ||
| 361 | selfm = [m['mirror'] for m in self.mirrors] | ||
| 362 | DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next) | ||
| 363 | |||
| 364 | ##################################################################### | ||
| 365 | # NON-CONFIGURATION METHODS | ||
| 366 | # these methods are designed to be largely workhorse methods that | ||
| 367 | # are not intended to be overridden. That doesn't mean you can't; | ||
| 368 | # if you want to, feel free, but most things can be done by | ||
| 369 | # by overriding the configuration methods :) | ||
| 370 | |||
| 371 | def _join_url(self, base_url, rel_url): | ||
| 372 | if base_url.endswith('/') or rel_url.startswith('/'): | ||
| 373 | return base_url + rel_url | ||
| 374 | else: | ||
| 375 | return base_url + '/' + rel_url | ||
| 376 | |||
| 377 | def _mirror_try(self, func, url, kw): | ||
| 378 | gr = GrabRequest() | ||
| 379 | gr.func = func | ||
| 380 | gr.url = url | ||
| 381 | gr.kw = dict(kw) | ||
| 382 | self._load_gr(gr) | ||
| 383 | |||
| 384 | for k in self.options: | ||
| 385 | try: del kw[k] | ||
| 386 | except KeyError: pass | ||
| 387 | |||
| 388 | while 1: | ||
| 389 | mirrorchoice = self._get_mirror(gr) | ||
| 390 | fullurl = self._join_url(mirrorchoice['mirror'], gr.url) | ||
| 391 | kwargs = dict(mirrorchoice.get('kwargs', {})) | ||
| 392 | kwargs.update(kw) | ||
| 393 | grabber = mirrorchoice.get('grabber') or self.grabber | ||
| 394 | func_ref = getattr(grabber, func) | ||
| 395 | if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl) | ||
| 396 | try: | ||
| 397 | return func_ref( *(fullurl,), **kwargs ) | ||
| 398 | except URLGrabError, e: | ||
| 399 | if DEBUG: DEBUG.info('MIRROR: failed') | ||
| 400 | obj = CallbackObject() | ||
| 401 | obj.exception = e | ||
| 402 | obj.mirror = mirrorchoice['mirror'] | ||
| 403 | obj.relative_url = gr.url | ||
| 404 | obj.url = fullurl | ||
| 405 | self._failure(gr, obj) | ||
| 406 | |||
| 407 | def urlgrab(self, url, filename=None, **kwargs): | ||
| 408 | kw = dict(kwargs) | ||
| 409 | kw['filename'] = filename | ||
| 410 | func = 'urlgrab' | ||
| 411 | return self._mirror_try(func, url, kw) | ||
| 412 | |||
| 413 | def urlopen(self, url, **kwargs): | ||
| 414 | kw = dict(kwargs) | ||
| 415 | func = 'urlopen' | ||
| 416 | return self._mirror_try(func, url, kw) | ||
| 417 | |||
| 418 | def urlread(self, url, limit=None, **kwargs): | ||
| 419 | kw = dict(kwargs) | ||
| 420 | kw['limit'] = limit | ||
| 421 | func = 'urlread' | ||
| 422 | return self._mirror_try(func, url, kw) | ||
| 423 | |||
| 424 | |||
| 425 | class MGRandomStart(MirrorGroup): | ||
| 426 | """A mirror group that starts at a random mirror in the list. | ||
| 427 | |||
| 428 | This behavior of this class is identical to MirrorGroup, except that | ||
| 429 | it starts at a random location in the mirror list. | ||
| 430 | """ | ||
| 431 | |||
| 432 | def __init__(self, grabber, mirrors, **kwargs): | ||
| 433 | """Initialize the object | ||
| 434 | |||
| 435 | The arguments for intialization are the same as for MirrorGroup | ||
| 436 | """ | ||
| 437 | MirrorGroup.__init__(self, grabber, mirrors, **kwargs) | ||
| 438 | self._next = random.randrange(len(mirrors)) | ||
| 439 | |||
| 440 | class MGRandomOrder(MirrorGroup): | ||
| 441 | """A mirror group that uses mirrors in a random order. | ||
| 442 | |||
| 443 | This behavior of this class is identical to MirrorGroup, except that | ||
| 444 | it uses the mirrors in a random order. Note that the order is set at | ||
| 445 | initialization time and fixed thereafter. That is, it does not pick a | ||
| 446 | random mirror after each failure. | ||
| 447 | """ | ||
| 448 | |||
| 449 | def __init__(self, grabber, mirrors, **kwargs): | ||
| 450 | """Initialize the object | ||
| 451 | |||
| 452 | The arguments for intialization are the same as for MirrorGroup | ||
| 453 | """ | ||
| 454 | MirrorGroup.__init__(self, grabber, mirrors, **kwargs) | ||
| 455 | random.shuffle(self.mirrors) | ||
| 456 | |||
| 457 | if __name__ == '__main__': | ||
| 458 | pass | ||
