diff options
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py')
| -rw-r--r-- | scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py | 463 |
1 files changed, 463 insertions, 0 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py new file mode 100644 index 0000000000..001b4e32d6 --- /dev/null +++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py | |||
| @@ -0,0 +1,463 @@ | |||
| 1 | # This library is free software; you can redistribute it and/or | ||
| 2 | # modify it under the terms of the GNU Lesser General Public | ||
| 3 | # License as published by the Free Software Foundation; either | ||
| 4 | # version 2.1 of the License, or (at your option) any later version. | ||
| 5 | # | ||
| 6 | # This library is distributed in the hope that it will be useful, | ||
| 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 9 | # Lesser General Public License for more details. | ||
| 10 | # | ||
| 11 | # You should have received a copy of the GNU Lesser General Public | ||
| 12 | # License along with this library; if not, write to the | ||
| 13 | # Free Software Foundation, Inc., | ||
| 14 | # 59 Temple Place, Suite 330, | ||
| 15 | # Boston, MA 02111-1307 USA | ||
| 16 | |||
| 17 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||
| 18 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | ||
| 19 | |||
| 20 | # $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $ | ||
| 21 | |||
| 22 | import os | ||
| 23 | import stat | ||
| 24 | import urllib | ||
| 25 | import urllib2 | ||
| 26 | import rfc822 | ||
| 27 | |||
| 28 | DEBUG = None | ||
| 29 | |||
| 30 | try: | ||
| 31 | from cStringIO import StringIO | ||
| 32 | except ImportError, msg: | ||
| 33 | from StringIO import StringIO | ||
| 34 | |||
| 35 | class RangeError(IOError): | ||
| 36 | """Error raised when an unsatisfiable range is requested.""" | ||
| 37 | pass | ||
| 38 | |||
| 39 | class HTTPRangeHandler(urllib2.BaseHandler): | ||
| 40 | """Handler that enables HTTP Range headers. | ||
| 41 | |||
| 42 | This was extremely simple. The Range header is a HTTP feature to | ||
| 43 | begin with so all this class does is tell urllib2 that the | ||
| 44 | "206 Partial Content" reponse from the HTTP server is what we | ||
| 45 | expected. | ||
| 46 | |||
| 47 | Example: | ||
| 48 | import urllib2 | ||
| 49 | import byterange | ||
| 50 | |||
| 51 | range_handler = range.HTTPRangeHandler() | ||
| 52 | opener = urllib2.build_opener(range_handler) | ||
| 53 | |||
| 54 | # install it | ||
| 55 | urllib2.install_opener(opener) | ||
| 56 | |||
| 57 | # create Request and set Range header | ||
| 58 | req = urllib2.Request('http://www.python.org/') | ||
| 59 | req.header['Range'] = 'bytes=30-50' | ||
| 60 | f = urllib2.urlopen(req) | ||
| 61 | """ | ||
| 62 | |||
| 63 | def http_error_206(self, req, fp, code, msg, hdrs): | ||
| 64 | # 206 Partial Content Response | ||
| 65 | r = urllib.addinfourl(fp, hdrs, req.get_full_url()) | ||
| 66 | r.code = code | ||
| 67 | r.msg = msg | ||
| 68 | return r | ||
| 69 | |||
| 70 | def http_error_416(self, req, fp, code, msg, hdrs): | ||
| 71 | # HTTP's Range Not Satisfiable error | ||
| 72 | raise RangeError('Requested Range Not Satisfiable') | ||
| 73 | |||
| 74 | class HTTPSRangeHandler(HTTPRangeHandler): | ||
| 75 | """ Range Header support for HTTPS. """ | ||
| 76 | |||
| 77 | def https_error_206(self, req, fp, code, msg, hdrs): | ||
| 78 | return self.http_error_206(req, fp, code, msg, hdrs) | ||
| 79 | |||
| 80 | def https_error_416(self, req, fp, code, msg, hdrs): | ||
| 81 | self.https_error_416(req, fp, code, msg, hdrs) | ||
| 82 | |||
| 83 | class RangeableFileObject: | ||
| 84 | """File object wrapper to enable raw range handling. | ||
| 85 | This was implemented primarilary for handling range | ||
| 86 | specifications for file:// urls. This object effectively makes | ||
| 87 | a file object look like it consists only of a range of bytes in | ||
| 88 | the stream. | ||
| 89 | |||
| 90 | Examples: | ||
| 91 | # expose 10 bytes, starting at byte position 20, from | ||
| 92 | # /etc/aliases. | ||
| 93 | >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30)) | ||
| 94 | # seek seeks within the range (to position 23 in this case) | ||
| 95 | >>> fo.seek(3) | ||
| 96 | # tell tells where your at _within the range_ (position 3 in | ||
| 97 | # this case) | ||
| 98 | >>> fo.tell() | ||
| 99 | # read EOFs if an attempt is made to read past the last | ||
| 100 | # byte in the range. the following will return only 7 bytes. | ||
| 101 | >>> fo.read(30) | ||
| 102 | """ | ||
| 103 | |||
| 104 | def __init__(self, fo, rangetup): | ||
| 105 | """Create a RangeableFileObject. | ||
| 106 | fo -- a file like object. only the read() method need be | ||
| 107 | supported but supporting an optimized seek() is | ||
| 108 | preferable. | ||
| 109 | rangetup -- a (firstbyte,lastbyte) tuple specifying the range | ||
| 110 | to work over. | ||
| 111 | The file object provided is assumed to be at byte offset 0. | ||
| 112 | """ | ||
| 113 | self.fo = fo | ||
| 114 | (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup) | ||
| 115 | self.realpos = 0 | ||
| 116 | self._do_seek(self.firstbyte) | ||
| 117 | |||
| 118 | def __getattr__(self, name): | ||
| 119 | """This effectively allows us to wrap at the instance level. | ||
| 120 | Any attribute not found in _this_ object will be searched for | ||
| 121 | in self.fo. This includes methods.""" | ||
| 122 | if hasattr(self.fo, name): | ||
| 123 | return getattr(self.fo, name) | ||
| 124 | raise AttributeError, name | ||
| 125 | |||
| 126 | def tell(self): | ||
| 127 | """Return the position within the range. | ||
| 128 | This is different from fo.seek in that position 0 is the | ||
| 129 | first byte position of the range tuple. For example, if | ||
| 130 | this object was created with a range tuple of (500,899), | ||
| 131 | tell() will return 0 when at byte position 500 of the file. | ||
| 132 | """ | ||
| 133 | return (self.realpos - self.firstbyte) | ||
| 134 | |||
| 135 | def seek(self,offset,whence=0): | ||
| 136 | """Seek within the byte range. | ||
| 137 | Positioning is identical to that described under tell(). | ||
| 138 | """ | ||
| 139 | assert whence in (0, 1, 2) | ||
| 140 | if whence == 0: # absolute seek | ||
| 141 | realoffset = self.firstbyte + offset | ||
| 142 | elif whence == 1: # relative seek | ||
| 143 | realoffset = self.realpos + offset | ||
| 144 | elif whence == 2: # absolute from end of file | ||
| 145 | # XXX: are we raising the right Error here? | ||
| 146 | raise IOError('seek from end of file not supported.') | ||
| 147 | |||
| 148 | # do not allow seek past lastbyte in range | ||
| 149 | if self.lastbyte and (realoffset >= self.lastbyte): | ||
| 150 | realoffset = self.lastbyte | ||
| 151 | |||
| 152 | self._do_seek(realoffset - self.realpos) | ||
| 153 | |||
| 154 | def read(self, size=-1): | ||
| 155 | """Read within the range. | ||
| 156 | This method will limit the size read based on the range. | ||
| 157 | """ | ||
| 158 | size = self._calc_read_size(size) | ||
| 159 | rslt = self.fo.read(size) | ||
| 160 | self.realpos += len(rslt) | ||
| 161 | return rslt | ||
| 162 | |||
| 163 | def readline(self, size=-1): | ||
| 164 | """Read lines within the range. | ||
| 165 | This method will limit the size read based on the range. | ||
| 166 | """ | ||
| 167 | size = self._calc_read_size(size) | ||
| 168 | rslt = self.fo.readline(size) | ||
| 169 | self.realpos += len(rslt) | ||
| 170 | return rslt | ||
| 171 | |||
| 172 | def _calc_read_size(self, size): | ||
| 173 | """Handles calculating the amount of data to read based on | ||
| 174 | the range. | ||
| 175 | """ | ||
| 176 | if self.lastbyte: | ||
| 177 | if size > -1: | ||
| 178 | if ((self.realpos + size) >= self.lastbyte): | ||
| 179 | size = (self.lastbyte - self.realpos) | ||
| 180 | else: | ||
| 181 | size = (self.lastbyte - self.realpos) | ||
| 182 | return size | ||
| 183 | |||
| 184 | def _do_seek(self,offset): | ||
| 185 | """Seek based on whether wrapped object supports seek(). | ||
| 186 | offset is relative to the current position (self.realpos). | ||
| 187 | """ | ||
| 188 | assert offset >= 0 | ||
| 189 | if not hasattr(self.fo, 'seek'): | ||
| 190 | self._poor_mans_seek(offset) | ||
| 191 | else: | ||
| 192 | self.fo.seek(self.realpos + offset) | ||
| 193 | self.realpos+= offset | ||
| 194 | |||
| 195 | def _poor_mans_seek(self,offset): | ||
| 196 | """Seek by calling the wrapped file objects read() method. | ||
| 197 | This is used for file like objects that do not have native | ||
| 198 | seek support. The wrapped objects read() method is called | ||
| 199 | to manually seek to the desired position. | ||
| 200 | offset -- read this number of bytes from the wrapped | ||
| 201 | file object. | ||
| 202 | raise RangeError if we encounter EOF before reaching the | ||
| 203 | specified offset. | ||
| 204 | """ | ||
| 205 | pos = 0 | ||
| 206 | bufsize = 1024 | ||
| 207 | while pos < offset: | ||
| 208 | if (pos + bufsize) > offset: | ||
| 209 | bufsize = offset - pos | ||
| 210 | buf = self.fo.read(bufsize) | ||
| 211 | if len(buf) != bufsize: | ||
| 212 | raise RangeError('Requested Range Not Satisfiable') | ||
| 213 | pos+= bufsize | ||
| 214 | |||
| 215 | class FileRangeHandler(urllib2.FileHandler): | ||
| 216 | """FileHandler subclass that adds Range support. | ||
| 217 | This class handles Range headers exactly like an HTTP | ||
| 218 | server would. | ||
| 219 | """ | ||
| 220 | def open_local_file(self, req): | ||
| 221 | import mimetypes | ||
| 222 | import mimetools | ||
| 223 | host = req.get_host() | ||
| 224 | file = req.get_selector() | ||
| 225 | localfile = urllib.url2pathname(file) | ||
| 226 | stats = os.stat(localfile) | ||
| 227 | size = stats[stat.ST_SIZE] | ||
| 228 | modified = rfc822.formatdate(stats[stat.ST_MTIME]) | ||
| 229 | mtype = mimetypes.guess_type(file)[0] | ||
| 230 | if host: | ||
| 231 | host, port = urllib.splitport(host) | ||
| 232 | if port or socket.gethostbyname(host) not in self.get_names(): | ||
| 233 | raise urllib2.URLError('file not on local host') | ||
| 234 | fo = open(localfile,'rb') | ||
| 235 | brange = req.headers.get('Range',None) | ||
| 236 | brange = range_header_to_tuple(brange) | ||
| 237 | assert brange != () | ||
| 238 | if brange: | ||
| 239 | (fb,lb) = brange | ||
| 240 | if lb == '': lb = size | ||
| 241 | if fb < 0 or fb > size or lb > size: | ||
| 242 | raise RangeError('Requested Range Not Satisfiable') | ||
| 243 | size = (lb - fb) | ||
| 244 | fo = RangeableFileObject(fo, (fb,lb)) | ||
| 245 | headers = mimetools.Message(StringIO( | ||
| 246 | 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % | ||
| 247 | (mtype or 'text/plain', size, modified))) | ||
| 248 | return urllib.addinfourl(fo, headers, 'file:'+file) | ||
| 249 | |||
| 250 | |||
| 251 | # FTP Range Support | ||
| 252 | # Unfortunately, a large amount of base FTP code had to be copied | ||
| 253 | # from urllib and urllib2 in order to insert the FTP REST command. | ||
| 254 | # Code modifications for range support have been commented as | ||
| 255 | # follows: | ||
| 256 | # -- range support modifications start/end here | ||
| 257 | |||
| 258 | from urllib import splitport, splituser, splitpasswd, splitattr, \ | ||
| 259 | unquote, addclosehook, addinfourl | ||
| 260 | import ftplib | ||
| 261 | import socket | ||
| 262 | import sys | ||
| 263 | import ftplib | ||
| 264 | import mimetypes | ||
| 265 | import mimetools | ||
| 266 | |||
| 267 | class FTPRangeHandler(urllib2.FTPHandler): | ||
| 268 | def ftp_open(self, req): | ||
| 269 | host = req.get_host() | ||
| 270 | if not host: | ||
| 271 | raise IOError, ('ftp error', 'no host given') | ||
| 272 | host, port = splitport(host) | ||
| 273 | if port is None: | ||
| 274 | port = ftplib.FTP_PORT | ||
| 275 | |||
| 276 | # username/password handling | ||
| 277 | user, host = splituser(host) | ||
| 278 | if user: | ||
| 279 | user, passwd = splitpasswd(user) | ||
| 280 | else: | ||
| 281 | passwd = None | ||
| 282 | host = unquote(host) | ||
| 283 | user = unquote(user or '') | ||
| 284 | passwd = unquote(passwd or '') | ||
| 285 | |||
| 286 | try: | ||
| 287 | host = socket.gethostbyname(host) | ||
| 288 | except socket.error, msg: | ||
| 289 | raise urllib2.URLError(msg) | ||
| 290 | path, attrs = splitattr(req.get_selector()) | ||
| 291 | dirs = path.split('/') | ||
| 292 | dirs = map(unquote, dirs) | ||
| 293 | dirs, file = dirs[:-1], dirs[-1] | ||
| 294 | if dirs and not dirs[0]: | ||
| 295 | dirs = dirs[1:] | ||
| 296 | try: | ||
| 297 | fw = self.connect_ftp(user, passwd, host, port, dirs) | ||
| 298 | type = file and 'I' or 'D' | ||
| 299 | for attr in attrs: | ||
| 300 | attr, value = splitattr(attr) | ||
| 301 | if attr.lower() == 'type' and \ | ||
| 302 | value in ('a', 'A', 'i', 'I', 'd', 'D'): | ||
| 303 | type = value.upper() | ||
| 304 | |||
| 305 | # -- range support modifications start here | ||
| 306 | rest = None | ||
| 307 | range_tup = range_header_to_tuple(req.headers.get('Range',None)) | ||
| 308 | assert range_tup != () | ||
| 309 | if range_tup: | ||
| 310 | (fb,lb) = range_tup | ||
| 311 | if fb > 0: rest = fb | ||
| 312 | # -- range support modifications end here | ||
| 313 | |||
| 314 | fp, retrlen = fw.retrfile(file, type, rest) | ||
| 315 | |||
| 316 | # -- range support modifications start here | ||
| 317 | if range_tup: | ||
| 318 | (fb,lb) = range_tup | ||
| 319 | if lb == '': | ||
| 320 | if retrlen is None or retrlen == 0: | ||
| 321 | raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.') | ||
| 322 | lb = retrlen | ||
| 323 | retrlen = lb - fb | ||
| 324 | if retrlen < 0: | ||
| 325 | # beginning of range is larger than file | ||
| 326 | raise RangeError('Requested Range Not Satisfiable') | ||
| 327 | else: | ||
| 328 | retrlen = lb - fb | ||
| 329 | fp = RangeableFileObject(fp, (0,retrlen)) | ||
| 330 | # -- range support modifications end here | ||
| 331 | |||
| 332 | headers = "" | ||
| 333 | mtype = mimetypes.guess_type(req.get_full_url())[0] | ||
| 334 | if mtype: | ||
| 335 | headers += "Content-Type: %s\n" % mtype | ||
| 336 | if retrlen is not None and retrlen >= 0: | ||
| 337 | headers += "Content-Length: %d\n" % retrlen | ||
| 338 | sf = StringIO(headers) | ||
| 339 | headers = mimetools.Message(sf) | ||
| 340 | return addinfourl(fp, headers, req.get_full_url()) | ||
| 341 | except ftplib.all_errors, msg: | ||
| 342 | raise IOError, ('ftp error', msg), sys.exc_info()[2] | ||
| 343 | |||
| 344 | def connect_ftp(self, user, passwd, host, port, dirs): | ||
| 345 | fw = ftpwrapper(user, passwd, host, port, dirs) | ||
| 346 | return fw | ||
| 347 | |||
| 348 | class ftpwrapper(urllib.ftpwrapper): | ||
| 349 | # range support note: | ||
| 350 | # this ftpwrapper code is copied directly from | ||
| 351 | # urllib. The only enhancement is to add the rest | ||
| 352 | # argument and pass it on to ftp.ntransfercmd | ||
| 353 | def retrfile(self, file, type, rest=None): | ||
| 354 | self.endtransfer() | ||
| 355 | if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 | ||
| 356 | else: cmd = 'TYPE ' + type; isdir = 0 | ||
| 357 | try: | ||
| 358 | self.ftp.voidcmd(cmd) | ||
| 359 | except ftplib.all_errors: | ||
| 360 | self.init() | ||
| 361 | self.ftp.voidcmd(cmd) | ||
| 362 | conn = None | ||
| 363 | if file and not isdir: | ||
| 364 | # Use nlst to see if the file exists at all | ||
| 365 | try: | ||
| 366 | self.ftp.nlst(file) | ||
| 367 | except ftplib.error_perm, reason: | ||
| 368 | raise IOError, ('ftp error', reason), sys.exc_info()[2] | ||
| 369 | # Restore the transfer mode! | ||
| 370 | self.ftp.voidcmd(cmd) | ||
| 371 | # Try to retrieve as a file | ||
| 372 | try: | ||
| 373 | cmd = 'RETR ' + file | ||
| 374 | conn = self.ftp.ntransfercmd(cmd, rest) | ||
| 375 | except ftplib.error_perm, reason: | ||
| 376 | if str(reason)[:3] == '501': | ||
| 377 | # workaround for REST not supported error | ||
| 378 | fp, retrlen = self.retrfile(file, type) | ||
| 379 | fp = RangeableFileObject(fp, (rest,'')) | ||
| 380 | return (fp, retrlen) | ||
| 381 | elif str(reason)[:3] != '550': | ||
| 382 | raise IOError, ('ftp error', reason), sys.exc_info()[2] | ||
| 383 | if not conn: | ||
| 384 | # Set transfer mode to ASCII! | ||
| 385 | self.ftp.voidcmd('TYPE A') | ||
| 386 | # Try a directory listing | ||
| 387 | if file: cmd = 'LIST ' + file | ||
| 388 | else: cmd = 'LIST' | ||
| 389 | conn = self.ftp.ntransfercmd(cmd) | ||
| 390 | self.busy = 1 | ||
| 391 | # Pass back both a suitably decorated object and a retrieval length | ||
| 392 | return (addclosehook(conn[0].makefile('rb'), | ||
| 393 | self.endtransfer), conn[1]) | ||
| 394 | |||
| 395 | |||
| 396 | #################################################################### | ||
| 397 | # Range Tuple Functions | ||
| 398 | # XXX: These range tuple functions might go better in a class. | ||
| 399 | |||
| 400 | _rangere = None | ||
| 401 | def range_header_to_tuple(range_header): | ||
| 402 | """Get a (firstbyte,lastbyte) tuple from a Range header value. | ||
| 403 | |||
| 404 | Range headers have the form "bytes=<firstbyte>-<lastbyte>". This | ||
| 405 | function pulls the firstbyte and lastbyte values and returns | ||
| 406 | a (firstbyte,lastbyte) tuple. If lastbyte is not specified in | ||
| 407 | the header value, it is returned as an empty string in the | ||
| 408 | tuple. | ||
| 409 | |||
| 410 | Return None if range_header is None | ||
| 411 | Return () if range_header does not conform to the range spec | ||
| 412 | pattern. | ||
| 413 | |||
| 414 | """ | ||
| 415 | global _rangere | ||
| 416 | if range_header is None: return None | ||
| 417 | if _rangere is None: | ||
| 418 | import re | ||
| 419 | _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)') | ||
| 420 | match = _rangere.match(range_header) | ||
| 421 | if match: | ||
| 422 | tup = range_tuple_normalize(match.group(1,2)) | ||
| 423 | if tup and tup[1]: | ||
| 424 | tup = (tup[0],tup[1]+1) | ||
| 425 | return tup | ||
| 426 | return () | ||
| 427 | |||
| 428 | def range_tuple_to_header(range_tup): | ||
| 429 | """Convert a range tuple to a Range header value. | ||
| 430 | Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None | ||
| 431 | if no range is needed. | ||
| 432 | """ | ||
| 433 | if range_tup is None: return None | ||
| 434 | range_tup = range_tuple_normalize(range_tup) | ||
| 435 | if range_tup: | ||
| 436 | if range_tup[1]: | ||
| 437 | range_tup = (range_tup[0],range_tup[1] - 1) | ||
| 438 | return 'bytes=%s-%s' % range_tup | ||
| 439 | |||
| 440 | def range_tuple_normalize(range_tup): | ||
| 441 | """Normalize a (first_byte,last_byte) range tuple. | ||
| 442 | Return a tuple whose first element is guaranteed to be an int | ||
| 443 | and whose second element will be '' (meaning: the last byte) or | ||
| 444 | an int. Finally, return None if the normalized tuple == (0,'') | ||
| 445 | as that is equivelant to retrieving the entire file. | ||
| 446 | """ | ||
| 447 | if range_tup is None: return None | ||
| 448 | # handle first byte | ||
| 449 | fb = range_tup[0] | ||
| 450 | if fb in (None,''): fb = 0 | ||
| 451 | else: fb = int(fb) | ||
| 452 | # handle last byte | ||
| 453 | try: lb = range_tup[1] | ||
| 454 | except IndexError: lb = '' | ||
| 455 | else: | ||
| 456 | if lb is None: lb = '' | ||
| 457 | elif lb != '': lb = int(lb) | ||
| 458 | # check if range is over the entire file | ||
| 459 | if (fb,lb) == (0,''): return None | ||
| 460 | # check that the range is valid | ||
| 461 | if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb)) | ||
| 462 | return (fb,lb) | ||
| 463 | |||
