summaryrefslogtreecommitdiffstats
path: root/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py')
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py463
1 files changed, 463 insertions, 0 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py
new file mode 100644
index 0000000000..001b4e32d6
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/byterange.py
@@ -0,0 +1,463 @@
1# This library is free software; you can redistribute it and/or
2# modify it under the terms of the GNU Lesser General Public
3# License as published by the Free Software Foundation; either
4# version 2.1 of the License, or (at your option) any later version.
5#
6# This library is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9# Lesser General Public License for more details.
10#
11# You should have received a copy of the GNU Lesser General Public
12# License along with this library; if not, write to the
13# Free Software Foundation, Inc.,
14# 59 Temple Place, Suite 330,
15# Boston, MA 02111-1307 USA
16
17# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
20# $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $
21
22import os
23import stat
24import urllib
25import urllib2
26import rfc822
27
28DEBUG = None
29
30try:
31 from cStringIO import StringIO
32except ImportError, msg:
33 from StringIO import StringIO
34
35class RangeError(IOError):
36 """Error raised when an unsatisfiable range is requested."""
37 pass
38
39class HTTPRangeHandler(urllib2.BaseHandler):
40 """Handler that enables HTTP Range headers.
41
42 This was extremely simple. The Range header is a HTTP feature to
43 begin with so all this class does is tell urllib2 that the
44 "206 Partial Content" reponse from the HTTP server is what we
45 expected.
46
47 Example:
48 import urllib2
49 import byterange
50
51 range_handler = range.HTTPRangeHandler()
52 opener = urllib2.build_opener(range_handler)
53
54 # install it
55 urllib2.install_opener(opener)
56
57 # create Request and set Range header
58 req = urllib2.Request('http://www.python.org/')
59 req.header['Range'] = 'bytes=30-50'
60 f = urllib2.urlopen(req)
61 """
62
63 def http_error_206(self, req, fp, code, msg, hdrs):
64 # 206 Partial Content Response
65 r = urllib.addinfourl(fp, hdrs, req.get_full_url())
66 r.code = code
67 r.msg = msg
68 return r
69
70 def http_error_416(self, req, fp, code, msg, hdrs):
71 # HTTP's Range Not Satisfiable error
72 raise RangeError('Requested Range Not Satisfiable')
73
74class HTTPSRangeHandler(HTTPRangeHandler):
75 """ Range Header support for HTTPS. """
76
77 def https_error_206(self, req, fp, code, msg, hdrs):
78 return self.http_error_206(req, fp, code, msg, hdrs)
79
80 def https_error_416(self, req, fp, code, msg, hdrs):
81 self.https_error_416(req, fp, code, msg, hdrs)
82
83class RangeableFileObject:
84 """File object wrapper to enable raw range handling.
85 This was implemented primarilary for handling range
86 specifications for file:// urls. This object effectively makes
87 a file object look like it consists only of a range of bytes in
88 the stream.
89
90 Examples:
91 # expose 10 bytes, starting at byte position 20, from
92 # /etc/aliases.
93 >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
94 # seek seeks within the range (to position 23 in this case)
95 >>> fo.seek(3)
96 # tell tells where your at _within the range_ (position 3 in
97 # this case)
98 >>> fo.tell()
99 # read EOFs if an attempt is made to read past the last
100 # byte in the range. the following will return only 7 bytes.
101 >>> fo.read(30)
102 """
103
104 def __init__(self, fo, rangetup):
105 """Create a RangeableFileObject.
106 fo -- a file like object. only the read() method need be
107 supported but supporting an optimized seek() is
108 preferable.
109 rangetup -- a (firstbyte,lastbyte) tuple specifying the range
110 to work over.
111 The file object provided is assumed to be at byte offset 0.
112 """
113 self.fo = fo
114 (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
115 self.realpos = 0
116 self._do_seek(self.firstbyte)
117
118 def __getattr__(self, name):
119 """This effectively allows us to wrap at the instance level.
120 Any attribute not found in _this_ object will be searched for
121 in self.fo. This includes methods."""
122 if hasattr(self.fo, name):
123 return getattr(self.fo, name)
124 raise AttributeError, name
125
126 def tell(self):
127 """Return the position within the range.
128 This is different from fo.seek in that position 0 is the
129 first byte position of the range tuple. For example, if
130 this object was created with a range tuple of (500,899),
131 tell() will return 0 when at byte position 500 of the file.
132 """
133 return (self.realpos - self.firstbyte)
134
135 def seek(self,offset,whence=0):
136 """Seek within the byte range.
137 Positioning is identical to that described under tell().
138 """
139 assert whence in (0, 1, 2)
140 if whence == 0: # absolute seek
141 realoffset = self.firstbyte + offset
142 elif whence == 1: # relative seek
143 realoffset = self.realpos + offset
144 elif whence == 2: # absolute from end of file
145 # XXX: are we raising the right Error here?
146 raise IOError('seek from end of file not supported.')
147
148 # do not allow seek past lastbyte in range
149 if self.lastbyte and (realoffset >= self.lastbyte):
150 realoffset = self.lastbyte
151
152 self._do_seek(realoffset - self.realpos)
153
154 def read(self, size=-1):
155 """Read within the range.
156 This method will limit the size read based on the range.
157 """
158 size = self._calc_read_size(size)
159 rslt = self.fo.read(size)
160 self.realpos += len(rslt)
161 return rslt
162
163 def readline(self, size=-1):
164 """Read lines within the range.
165 This method will limit the size read based on the range.
166 """
167 size = self._calc_read_size(size)
168 rslt = self.fo.readline(size)
169 self.realpos += len(rslt)
170 return rslt
171
172 def _calc_read_size(self, size):
173 """Handles calculating the amount of data to read based on
174 the range.
175 """
176 if self.lastbyte:
177 if size > -1:
178 if ((self.realpos + size) >= self.lastbyte):
179 size = (self.lastbyte - self.realpos)
180 else:
181 size = (self.lastbyte - self.realpos)
182 return size
183
184 def _do_seek(self,offset):
185 """Seek based on whether wrapped object supports seek().
186 offset is relative to the current position (self.realpos).
187 """
188 assert offset >= 0
189 if not hasattr(self.fo, 'seek'):
190 self._poor_mans_seek(offset)
191 else:
192 self.fo.seek(self.realpos + offset)
193 self.realpos+= offset
194
195 def _poor_mans_seek(self,offset):
196 """Seek by calling the wrapped file objects read() method.
197 This is used for file like objects that do not have native
198 seek support. The wrapped objects read() method is called
199 to manually seek to the desired position.
200 offset -- read this number of bytes from the wrapped
201 file object.
202 raise RangeError if we encounter EOF before reaching the
203 specified offset.
204 """
205 pos = 0
206 bufsize = 1024
207 while pos < offset:
208 if (pos + bufsize) > offset:
209 bufsize = offset - pos
210 buf = self.fo.read(bufsize)
211 if len(buf) != bufsize:
212 raise RangeError('Requested Range Not Satisfiable')
213 pos+= bufsize
214
215class FileRangeHandler(urllib2.FileHandler):
216 """FileHandler subclass that adds Range support.
217 This class handles Range headers exactly like an HTTP
218 server would.
219 """
220 def open_local_file(self, req):
221 import mimetypes
222 import mimetools
223 host = req.get_host()
224 file = req.get_selector()
225 localfile = urllib.url2pathname(file)
226 stats = os.stat(localfile)
227 size = stats[stat.ST_SIZE]
228 modified = rfc822.formatdate(stats[stat.ST_MTIME])
229 mtype = mimetypes.guess_type(file)[0]
230 if host:
231 host, port = urllib.splitport(host)
232 if port or socket.gethostbyname(host) not in self.get_names():
233 raise urllib2.URLError('file not on local host')
234 fo = open(localfile,'rb')
235 brange = req.headers.get('Range',None)
236 brange = range_header_to_tuple(brange)
237 assert brange != ()
238 if brange:
239 (fb,lb) = brange
240 if lb == '': lb = size
241 if fb < 0 or fb > size or lb > size:
242 raise RangeError('Requested Range Not Satisfiable')
243 size = (lb - fb)
244 fo = RangeableFileObject(fo, (fb,lb))
245 headers = mimetools.Message(StringIO(
246 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
247 (mtype or 'text/plain', size, modified)))
248 return urllib.addinfourl(fo, headers, 'file:'+file)
249
250
251# FTP Range Support
252# Unfortunately, a large amount of base FTP code had to be copied
253# from urllib and urllib2 in order to insert the FTP REST command.
254# Code modifications for range support have been commented as
255# follows:
256# -- range support modifications start/end here
257
258from urllib import splitport, splituser, splitpasswd, splitattr, \
259 unquote, addclosehook, addinfourl
260import ftplib
261import socket
262import sys
263import ftplib
264import mimetypes
265import mimetools
266
267class FTPRangeHandler(urllib2.FTPHandler):
268 def ftp_open(self, req):
269 host = req.get_host()
270 if not host:
271 raise IOError, ('ftp error', 'no host given')
272 host, port = splitport(host)
273 if port is None:
274 port = ftplib.FTP_PORT
275
276 # username/password handling
277 user, host = splituser(host)
278 if user:
279 user, passwd = splitpasswd(user)
280 else:
281 passwd = None
282 host = unquote(host)
283 user = unquote(user or '')
284 passwd = unquote(passwd or '')
285
286 try:
287 host = socket.gethostbyname(host)
288 except socket.error, msg:
289 raise urllib2.URLError(msg)
290 path, attrs = splitattr(req.get_selector())
291 dirs = path.split('/')
292 dirs = map(unquote, dirs)
293 dirs, file = dirs[:-1], dirs[-1]
294 if dirs and not dirs[0]:
295 dirs = dirs[1:]
296 try:
297 fw = self.connect_ftp(user, passwd, host, port, dirs)
298 type = file and 'I' or 'D'
299 for attr in attrs:
300 attr, value = splitattr(attr)
301 if attr.lower() == 'type' and \
302 value in ('a', 'A', 'i', 'I', 'd', 'D'):
303 type = value.upper()
304
305 # -- range support modifications start here
306 rest = None
307 range_tup = range_header_to_tuple(req.headers.get('Range',None))
308 assert range_tup != ()
309 if range_tup:
310 (fb,lb) = range_tup
311 if fb > 0: rest = fb
312 # -- range support modifications end here
313
314 fp, retrlen = fw.retrfile(file, type, rest)
315
316 # -- range support modifications start here
317 if range_tup:
318 (fb,lb) = range_tup
319 if lb == '':
320 if retrlen is None or retrlen == 0:
321 raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
322 lb = retrlen
323 retrlen = lb - fb
324 if retrlen < 0:
325 # beginning of range is larger than file
326 raise RangeError('Requested Range Not Satisfiable')
327 else:
328 retrlen = lb - fb
329 fp = RangeableFileObject(fp, (0,retrlen))
330 # -- range support modifications end here
331
332 headers = ""
333 mtype = mimetypes.guess_type(req.get_full_url())[0]
334 if mtype:
335 headers += "Content-Type: %s\n" % mtype
336 if retrlen is not None and retrlen >= 0:
337 headers += "Content-Length: %d\n" % retrlen
338 sf = StringIO(headers)
339 headers = mimetools.Message(sf)
340 return addinfourl(fp, headers, req.get_full_url())
341 except ftplib.all_errors, msg:
342 raise IOError, ('ftp error', msg), sys.exc_info()[2]
343
344 def connect_ftp(self, user, passwd, host, port, dirs):
345 fw = ftpwrapper(user, passwd, host, port, dirs)
346 return fw
347
348class ftpwrapper(urllib.ftpwrapper):
349 # range support note:
350 # this ftpwrapper code is copied directly from
351 # urllib. The only enhancement is to add the rest
352 # argument and pass it on to ftp.ntransfercmd
353 def retrfile(self, file, type, rest=None):
354 self.endtransfer()
355 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
356 else: cmd = 'TYPE ' + type; isdir = 0
357 try:
358 self.ftp.voidcmd(cmd)
359 except ftplib.all_errors:
360 self.init()
361 self.ftp.voidcmd(cmd)
362 conn = None
363 if file and not isdir:
364 # Use nlst to see if the file exists at all
365 try:
366 self.ftp.nlst(file)
367 except ftplib.error_perm, reason:
368 raise IOError, ('ftp error', reason), sys.exc_info()[2]
369 # Restore the transfer mode!
370 self.ftp.voidcmd(cmd)
371 # Try to retrieve as a file
372 try:
373 cmd = 'RETR ' + file
374 conn = self.ftp.ntransfercmd(cmd, rest)
375 except ftplib.error_perm, reason:
376 if str(reason)[:3] == '501':
377 # workaround for REST not supported error
378 fp, retrlen = self.retrfile(file, type)
379 fp = RangeableFileObject(fp, (rest,''))
380 return (fp, retrlen)
381 elif str(reason)[:3] != '550':
382 raise IOError, ('ftp error', reason), sys.exc_info()[2]
383 if not conn:
384 # Set transfer mode to ASCII!
385 self.ftp.voidcmd('TYPE A')
386 # Try a directory listing
387 if file: cmd = 'LIST ' + file
388 else: cmd = 'LIST'
389 conn = self.ftp.ntransfercmd(cmd)
390 self.busy = 1
391 # Pass back both a suitably decorated object and a retrieval length
392 return (addclosehook(conn[0].makefile('rb'),
393 self.endtransfer), conn[1])
394
395
396####################################################################
397# Range Tuple Functions
398# XXX: These range tuple functions might go better in a class.
399
400_rangere = None
401def range_header_to_tuple(range_header):
402 """Get a (firstbyte,lastbyte) tuple from a Range header value.
403
404 Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
405 function pulls the firstbyte and lastbyte values and returns
406 a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
407 the header value, it is returned as an empty string in the
408 tuple.
409
410 Return None if range_header is None
411 Return () if range_header does not conform to the range spec
412 pattern.
413
414 """
415 global _rangere
416 if range_header is None: return None
417 if _rangere is None:
418 import re
419 _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
420 match = _rangere.match(range_header)
421 if match:
422 tup = range_tuple_normalize(match.group(1,2))
423 if tup and tup[1]:
424 tup = (tup[0],tup[1]+1)
425 return tup
426 return ()
427
428def range_tuple_to_header(range_tup):
429 """Convert a range tuple to a Range header value.
430 Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
431 if no range is needed.
432 """
433 if range_tup is None: return None
434 range_tup = range_tuple_normalize(range_tup)
435 if range_tup:
436 if range_tup[1]:
437 range_tup = (range_tup[0],range_tup[1] - 1)
438 return 'bytes=%s-%s' % range_tup
439
440def range_tuple_normalize(range_tup):
441 """Normalize a (first_byte,last_byte) range tuple.
442 Return a tuple whose first element is guaranteed to be an int
443 and whose second element will be '' (meaning: the last byte) or
444 an int. Finally, return None if the normalized tuple == (0,'')
445 as that is equivelant to retrieving the entire file.
446 """
447 if range_tup is None: return None
448 # handle first byte
449 fb = range_tup[0]
450 if fb in (None,''): fb = 0
451 else: fb = int(fb)
452 # handle last byte
453 try: lb = range_tup[1]
454 except IndexError: lb = ''
455 else:
456 if lb is None: lb = ''
457 elif lb != '': lb = int(lb)
458 # check if range is over the entire file
459 if (fb,lb) == (0,''): return None
460 # check that the range is valid
461 if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
462 return (fb,lb)
463