summaryrefslogtreecommitdiffstats
path: root/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py')
-rw-r--r--scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py617
1 files changed, 617 insertions, 0 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py
new file mode 100644
index 0000000000..71393e2b8d
--- /dev/null
+++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py
@@ -0,0 +1,617 @@
1# This library is free software; you can redistribute it and/or
2# modify it under the terms of the GNU Lesser General Public
3# License as published by the Free Software Foundation; either
4# version 2.1 of the License, or (at your option) any later version.
5#
6# This library is distributed in the hope that it will be useful,
7# but WITHOUT ANY WARRANTY; without even the implied warranty of
8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
9# Lesser General Public License for more details.
10#
11# You should have received a copy of the GNU Lesser General Public
12# License along with this library; if not, write to the
13# Free Software Foundation, Inc.,
14# 59 Temple Place, Suite 330,
15# Boston, MA 02111-1307 USA
16
17# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
20"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
21
22>>> import urllib2
23>>> from keepalive import HTTPHandler
24>>> keepalive_handler = HTTPHandler()
25>>> opener = urllib2.build_opener(keepalive_handler)
26>>> urllib2.install_opener(opener)
27>>>
28>>> fo = urllib2.urlopen('http://www.python.org')
29
30If a connection to a given host is requested, and all of the existing
31connections are still in use, another connection will be opened. If
32the handler tries to use an existing connection but it fails in some
33way, it will be closed and removed from the pool.
34
35To remove the handler, simply re-run build_opener with no arguments, and
36install that opener.
37
38You can explicitly close connections by using the close_connection()
39method of the returned file-like object (described below) or you can
40use the handler methods:
41
42 close_connection(host)
43 close_all()
44 open_connections()
45
46NOTE: using the close_connection and close_all methods of the handler
47should be done with care when using multiple threads.
48 * there is nothing that prevents another thread from creating new
49 connections immediately after connections are closed
50 * no checks are done to prevent in-use connections from being closed
51
52>>> keepalive_handler.close_all()
53
54EXTRA ATTRIBUTES AND METHODS
55
56 Upon a status of 200, the object returned has a few additional
57 attributes and methods, which should not be used if you want to
58 remain consistent with the normal urllib2-returned objects:
59
60 close_connection() - close the connection to the host
61 readlines() - you know, readlines()
62 status - the return status (ie 404)
63 reason - english translation of status (ie 'File not found')
64
65 If you want the best of both worlds, use this inside an
66 AttributeError-catching try:
67
68 >>> try: status = fo.status
69 >>> except AttributeError: status = None
70
71 Unfortunately, these are ONLY there if status == 200, so it's not
72 easy to distinguish between non-200 responses. The reason is that
73 urllib2 tries to do clever things with error codes 301, 302, 401,
74 and 407, and it wraps the object upon return.
75
76 For python versions earlier than 2.4, you can avoid this fancy error
77 handling by setting the module-level global HANDLE_ERRORS to zero.
78 You see, prior to 2.4, it's the HTTP Handler's job to determine what
79 to handle specially, and what to just pass up. HANDLE_ERRORS == 0
80 means "pass everything up". In python 2.4, however, this job no
81 longer belongs to the HTTP Handler and is now done by a NEW handler,
82 HTTPErrorProcessor. Here's the bottom line:
83
84 python version < 2.4
85 HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
86 errors
87 HANDLE_ERRORS == 0 pass everything up, error processing is
88 left to the calling code
89 python version >= 2.4
90 HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
91 HANDLE_ERRORS == 0 (default) pass everything up, let the
92 other handlers (specifically,
93 HTTPErrorProcessor) decide what to do
94
95 In practice, setting the variable either way makes little difference
96 in python 2.4, so for the most consistent behavior across versions,
97 you probably just want to use the defaults, which will give you
98 exceptions on errors.
99
100"""
101
102# $Id: keepalive.py,v 1.16 2006/09/22 00:58:05 mstenner Exp $
103
104import urllib2
105import httplib
106import socket
107import thread
108
109DEBUG = None
110
111import sslfactory
112
113import sys
114if sys.version_info < (2, 4): HANDLE_ERRORS = 1
115else: HANDLE_ERRORS = 0
116
117class ConnectionManager:
118 """
119 The connection manager must be able to:
120 * keep track of all existing
121 """
122 def __init__(self):
123 self._lock = thread.allocate_lock()
124 self._hostmap = {} # map hosts to a list of connections
125 self._connmap = {} # map connections to host
126 self._readymap = {} # map connection to ready state
127
128 def add(self, host, connection, ready):
129 self._lock.acquire()
130 try:
131 if not self._hostmap.has_key(host): self._hostmap[host] = []
132 self._hostmap[host].append(connection)
133 self._connmap[connection] = host
134 self._readymap[connection] = ready
135 finally:
136 self._lock.release()
137
138 def remove(self, connection):
139 self._lock.acquire()
140 try:
141 try:
142 host = self._connmap[connection]
143 except KeyError:
144 pass
145 else:
146 del self._connmap[connection]
147 del self._readymap[connection]
148 self._hostmap[host].remove(connection)
149 if not self._hostmap[host]: del self._hostmap[host]
150 finally:
151 self._lock.release()
152
153 def set_ready(self, connection, ready):
154 try: self._readymap[connection] = ready
155 except KeyError: pass
156
157 def get_ready_conn(self, host):
158 conn = None
159 self._lock.acquire()
160 try:
161 if self._hostmap.has_key(host):
162 for c in self._hostmap[host]:
163 if self._readymap[c]:
164 self._readymap[c] = 0
165 conn = c
166 break
167 finally:
168 self._lock.release()
169 return conn
170
171 def get_all(self, host=None):
172 if host:
173 return list(self._hostmap.get(host, []))
174 else:
175 return dict(self._hostmap)
176
177class KeepAliveHandler:
178 def __init__(self):
179 self._cm = ConnectionManager()
180
181 #### Connection Management
182 def open_connections(self):
183 """return a list of connected hosts and the number of connections
184 to each. [('foo.com:80', 2), ('bar.org', 1)]"""
185 return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
186
187 def close_connection(self, host):
188 """close connection(s) to <host>
189 host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
190 no error occurs if there is no connection to that host."""
191 for h in self._cm.get_all(host):
192 self._cm.remove(h)
193 h.close()
194
195 def close_all(self):
196 """close all open connections"""
197 for host, conns in self._cm.get_all().items():
198 for h in conns:
199 self._cm.remove(h)
200 h.close()
201
202 def _request_closed(self, request, host, connection):
203 """tells us that this request is now closed and the the
204 connection is ready for another request"""
205 self._cm.set_ready(connection, 1)
206
207 def _remove_connection(self, host, connection, close=0):
208 if close: connection.close()
209 self._cm.remove(connection)
210
211 #### Transaction Execution
212 def do_open(self, req):
213 host = req.get_host()
214 if not host:
215 raise urllib2.URLError('no host given')
216
217 try:
218 h = self._cm.get_ready_conn(host)
219 while h:
220 r = self._reuse_connection(h, req, host)
221
222 # if this response is non-None, then it worked and we're
223 # done. Break out, skipping the else block.
224 if r: break
225
226 # connection is bad - possibly closed by server
227 # discard it and ask for the next free connection
228 h.close()
229 self._cm.remove(h)
230 h = self._cm.get_ready_conn(host)
231 else:
232 # no (working) free connections were found. Create a new one.
233 h = self._get_connection(host)
234 if DEBUG: DEBUG.info("creating new connection to %s (%d)",
235 host, id(h))
236 self._cm.add(host, h, 0)
237 self._start_transaction(h, req)
238 r = h.getresponse()
239 except (socket.error, httplib.HTTPException), err:
240 raise urllib2.URLError(err)
241
242 # if not a persistent connection, don't try to reuse it
243 if r.will_close: self._cm.remove(h)
244
245 if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
246 r._handler = self
247 r._host = host
248 r._url = req.get_full_url()
249 r._connection = h
250 r.code = r.status
251 r.headers = r.msg
252 r.msg = r.reason
253
254 if r.status == 200 or not HANDLE_ERRORS:
255 return r
256 else:
257 return self.parent.error('http', req, r,
258 r.status, r.msg, r.headers)
259
260 def _reuse_connection(self, h, req, host):
261 """start the transaction with a re-used connection
262 return a response object (r) upon success or None on failure.
263 This DOES not close or remove bad connections in cases where
264 it returns. However, if an unexpected exception occurs, it
265 will close and remove the connection before re-raising.
266 """
267 try:
268 self._start_transaction(h, req)
269 r = h.getresponse()
270 # note: just because we got something back doesn't mean it
271 # worked. We'll check the version below, too.
272 except (socket.error, httplib.HTTPException):
273 r = None
274 except:
275 # adding this block just in case we've missed
276 # something we will still raise the exception, but
277 # lets try and close the connection and remove it
278 # first. We previously got into a nasty loop
279 # where an exception was uncaught, and so the
280 # connection stayed open. On the next try, the
281 # same exception was raised, etc. The tradeoff is
282 # that it's now possible this call will raise
283 # a DIFFERENT exception
284 if DEBUG: DEBUG.error("unexpected exception - closing " + \
285 "connection to %s (%d)", host, id(h))
286 self._cm.remove(h)
287 h.close()
288 raise
289
290 if r is None or r.version == 9:
291 # httplib falls back to assuming HTTP 0.9 if it gets a
292 # bad header back. This is most likely to happen if
293 # the socket has been closed by the server since we
294 # last used the connection.
295 if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
296 host, id(h))
297 r = None
298 else:
299 if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
300
301 return r
302
303 def _start_transaction(self, h, req):
304 try:
305 if req.has_data():
306 data = req.get_data()
307 h.putrequest('POST', req.get_selector())
308 if not req.headers.has_key('Content-type'):
309 h.putheader('Content-type',
310 'application/x-www-form-urlencoded')
311 if not req.headers.has_key('Content-length'):
312 h.putheader('Content-length', '%d' % len(data))
313 else:
314 h.putrequest('GET', req.get_selector())
315 except (socket.error, httplib.HTTPException), err:
316 raise urllib2.URLError(err)
317
318 for args in self.parent.addheaders:
319 h.putheader(*args)
320 for k, v in req.headers.items():
321 h.putheader(k, v)
322 h.endheaders()
323 if req.has_data():
324 h.send(data)
325
326 def _get_connection(self, host):
327 return NotImplementedError
328
329class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
330 def __init__(self):
331 KeepAliveHandler.__init__(self)
332
333 def http_open(self, req):
334 return self.do_open(req)
335
336 def _get_connection(self, host):
337 return HTTPConnection(host)
338
339class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):
340 def __init__(self, ssl_factory=None):
341 KeepAliveHandler.__init__(self)
342 if not ssl_factory:
343 ssl_factory = sslfactory.get_factory()
344 self._ssl_factory = ssl_factory
345
346 def https_open(self, req):
347 return self.do_open(req)
348
349 def _get_connection(self, host):
350 return self._ssl_factory.get_https_connection(host)
351
352class HTTPResponse(httplib.HTTPResponse):
353 # we need to subclass HTTPResponse in order to
354 # 1) add readline() and readlines() methods
355 # 2) add close_connection() methods
356 # 3) add info() and geturl() methods
357
358 # in order to add readline(), read must be modified to deal with a
359 # buffer. example: readline must read a buffer and then spit back
360 # one line at a time. The only real alternative is to read one
361 # BYTE at a time (ick). Once something has been read, it can't be
362 # put back (ok, maybe it can, but that's even uglier than this),
363 # so if you THEN do a normal read, you must first take stuff from
364 # the buffer.
365
366 # the read method wraps the original to accomodate buffering,
367 # although read() never adds to the buffer.
368 # Both readline and readlines have been stolen with almost no
369 # modification from socket.py
370
371
372 def __init__(self, sock, debuglevel=0, strict=0, method=None):
373 if method: # the httplib in python 2.3 uses the method arg
374 httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
375 else: # 2.2 doesn't
376 httplib.HTTPResponse.__init__(self, sock, debuglevel)
377 self.fileno = sock.fileno
378 self.code = None
379 self._rbuf = ''
380 self._rbufsize = 8096
381 self._handler = None # inserted by the handler later
382 self._host = None # (same)
383 self._url = None # (same)
384 self._connection = None # (same)
385
386 _raw_read = httplib.HTTPResponse.read
387
388 def close(self):
389 if self.fp:
390 self.fp.close()
391 self.fp = None
392 if self._handler:
393 self._handler._request_closed(self, self._host,
394 self._connection)
395
396 def close_connection(self):
397 self._handler._remove_connection(self._host, self._connection, close=1)
398 self.close()
399
400 def info(self):
401 return self.headers
402
403 def geturl(self):
404 return self._url
405
406 def read(self, amt=None):
407 # the _rbuf test is only in this first if for speed. It's not
408 # logically necessary
409 if self._rbuf and not amt is None:
410 L = len(self._rbuf)
411 if amt > L:
412 amt -= L
413 else:
414 s = self._rbuf[:amt]
415 self._rbuf = self._rbuf[amt:]
416 return s
417
418 s = self._rbuf + self._raw_read(amt)
419 self._rbuf = ''
420 return s
421
422 def readline(self, limit=-1):
423 data = ""
424 i = self._rbuf.find('\n')
425 while i < 0 and not (0 < limit <= len(self._rbuf)):
426 new = self._raw_read(self._rbufsize)
427 if not new: break
428 i = new.find('\n')
429 if i >= 0: i = i + len(self._rbuf)
430 self._rbuf = self._rbuf + new
431 if i < 0: i = len(self._rbuf)
432 else: i = i+1
433 if 0 <= limit < len(self._rbuf): i = limit
434 data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
435 return data
436
437 def readlines(self, sizehint = 0):
438 total = 0
439 list = []
440 while 1:
441 line = self.readline()
442 if not line: break
443 list.append(line)
444 total += len(line)
445 if sizehint and total >= sizehint:
446 break
447 return list
448
449
450class HTTPConnection(httplib.HTTPConnection):
451 # use the modified response class
452 response_class = HTTPResponse
453
454class HTTPSConnection(httplib.HTTPSConnection):
455 response_class = HTTPResponse
456
457#########################################################################
458##### TEST FUNCTIONS
459#########################################################################
460
461def error_handler(url):
462 global HANDLE_ERRORS
463 orig = HANDLE_ERRORS
464 keepalive_handler = HTTPHandler()
465 opener = urllib2.build_opener(keepalive_handler)
466 urllib2.install_opener(opener)
467 pos = {0: 'off', 1: 'on'}
468 for i in (0, 1):
469 print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
470 HANDLE_ERRORS = i
471 try:
472 fo = urllib2.urlopen(url)
473 foo = fo.read()
474 fo.close()
475 try: status, reason = fo.status, fo.reason
476 except AttributeError: status, reason = None, None
477 except IOError, e:
478 print " EXCEPTION: %s" % e
479 raise
480 else:
481 print " status = %s, reason = %s" % (status, reason)
482 HANDLE_ERRORS = orig
483 hosts = keepalive_handler.open_connections()
484 print "open connections:", hosts
485 keepalive_handler.close_all()
486
487def continuity(url):
488 import md5
489 format = '%25s: %s'
490
491 # first fetch the file with the normal http handler
492 opener = urllib2.build_opener()
493 urllib2.install_opener(opener)
494 fo = urllib2.urlopen(url)
495 foo = fo.read()
496 fo.close()
497 m = md5.new(foo)
498 print format % ('normal urllib', m.hexdigest())
499
500 # now install the keepalive handler and try again
501 opener = urllib2.build_opener(HTTPHandler())
502 urllib2.install_opener(opener)
503
504 fo = urllib2.urlopen(url)
505 foo = fo.read()
506 fo.close()
507 m = md5.new(foo)
508 print format % ('keepalive read', m.hexdigest())
509
510 fo = urllib2.urlopen(url)
511 foo = ''
512 while 1:
513 f = fo.readline()
514 if f: foo = foo + f
515 else: break
516 fo.close()
517 m = md5.new(foo)
518 print format % ('keepalive readline', m.hexdigest())
519
520def comp(N, url):
521 print ' making %i connections to:\n %s' % (N, url)
522
523 sys.stdout.write(' first using the normal urllib handlers')
524 # first use normal opener
525 opener = urllib2.build_opener()
526 urllib2.install_opener(opener)
527 t1 = fetch(N, url)
528 print ' TIME: %.3f s' % t1
529
530 sys.stdout.write(' now using the keepalive handler ')
531 # now install the keepalive handler and try again
532 opener = urllib2.build_opener(HTTPHandler())
533 urllib2.install_opener(opener)
534 t2 = fetch(N, url)
535 print ' TIME: %.3f s' % t2
536 print ' improvement factor: %.2f' % (t1/t2, )
537
538def fetch(N, url, delay=0):
539 import time
540 lens = []
541 starttime = time.time()
542 for i in range(N):
543 if delay and i > 0: time.sleep(delay)
544 fo = urllib2.urlopen(url)
545 foo = fo.read()
546 fo.close()
547 lens.append(len(foo))
548 diff = time.time() - starttime
549
550 j = 0
551 for i in lens[1:]:
552 j = j + 1
553 if not i == lens[0]:
554 print "WARNING: inconsistent length on read %i: %i" % (j, i)
555
556 return diff
557
558def test_timeout(url):
559 global DEBUG
560 dbbackup = DEBUG
561 class FakeLogger:
562 def debug(self, msg, *args): print msg % args
563 info = warning = error = debug
564 DEBUG = FakeLogger()
565 print " fetching the file to establish a connection"
566 fo = urllib2.urlopen(url)
567 data1 = fo.read()
568 fo.close()
569
570 i = 20
571 print " waiting %i seconds for the server to close the connection" % i
572 while i > 0:
573 sys.stdout.write('\r %2i' % i)
574 sys.stdout.flush()
575 time.sleep(1)
576 i -= 1
577 sys.stderr.write('\r')
578
579 print " fetching the file a second time"
580 fo = urllib2.urlopen(url)
581 data2 = fo.read()
582 fo.close()
583
584 if data1 == data2:
585 print ' data are identical'
586 else:
587 print ' ERROR: DATA DIFFER'
588
589 DEBUG = dbbackup
590
591
592def test(url, N=10):
593 print "checking error hander (do this on a non-200)"
594 try: error_handler(url)
595 except IOError, e:
596 print "exiting - exception will prevent further tests"
597 sys.exit()
598 print
599 print "performing continuity test (making sure stuff isn't corrupted)"
600 continuity(url)
601 print
602 print "performing speed comparison"
603 comp(N, url)
604 print
605 print "performing dropped-connection check"
606 test_timeout(url)
607
608if __name__ == '__main__':
609 import time
610 import sys
611 try:
612 N = int(sys.argv[1])
613 url = sys.argv[2]
614 except:
615 print "%s <integer> <url>" % sys.argv[0]
616 else:
617 test(url, N)