diff options
Diffstat (limited to 'scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py')
| -rw-r--r-- | scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py | 617 |
1 files changed, 617 insertions, 0 deletions
diff --git a/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py new file mode 100644 index 0000000000..71393e2b8d --- /dev/null +++ b/scripts/lib/mic/3rdparty/pykickstart/urlgrabber/keepalive.py | |||
| @@ -0,0 +1,617 @@ | |||
| 1 | # This library is free software; you can redistribute it and/or | ||
| 2 | # modify it under the terms of the GNU Lesser General Public | ||
| 3 | # License as published by the Free Software Foundation; either | ||
| 4 | # version 2.1 of the License, or (at your option) any later version. | ||
| 5 | # | ||
| 6 | # This library is distributed in the hope that it will be useful, | ||
| 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 9 | # Lesser General Public License for more details. | ||
| 10 | # | ||
| 11 | # You should have received a copy of the GNU Lesser General Public | ||
| 12 | # License along with this library; if not, write to the | ||
| 13 | # Free Software Foundation, Inc., | ||
| 14 | # 59 Temple Place, Suite 330, | ||
| 15 | # Boston, MA 02111-1307 USA | ||
| 16 | |||
| 17 | # This file is part of urlgrabber, a high-level cross-protocol url-grabber | ||
| 18 | # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko | ||
| 19 | |||
| 20 | """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive. | ||
| 21 | |||
| 22 | >>> import urllib2 | ||
| 23 | >>> from keepalive import HTTPHandler | ||
| 24 | >>> keepalive_handler = HTTPHandler() | ||
| 25 | >>> opener = urllib2.build_opener(keepalive_handler) | ||
| 26 | >>> urllib2.install_opener(opener) | ||
| 27 | >>> | ||
| 28 | >>> fo = urllib2.urlopen('http://www.python.org') | ||
| 29 | |||
| 30 | If a connection to a given host is requested, and all of the existing | ||
| 31 | connections are still in use, another connection will be opened. If | ||
| 32 | the handler tries to use an existing connection but it fails in some | ||
| 33 | way, it will be closed and removed from the pool. | ||
| 34 | |||
| 35 | To remove the handler, simply re-run build_opener with no arguments, and | ||
| 36 | install that opener. | ||
| 37 | |||
| 38 | You can explicitly close connections by using the close_connection() | ||
| 39 | method of the returned file-like object (described below) or you can | ||
| 40 | use the handler methods: | ||
| 41 | |||
| 42 | close_connection(host) | ||
| 43 | close_all() | ||
| 44 | open_connections() | ||
| 45 | |||
| 46 | NOTE: using the close_connection and close_all methods of the handler | ||
| 47 | should be done with care when using multiple threads. | ||
| 48 | * there is nothing that prevents another thread from creating new | ||
| 49 | connections immediately after connections are closed | ||
| 50 | * no checks are done to prevent in-use connections from being closed | ||
| 51 | |||
| 52 | >>> keepalive_handler.close_all() | ||
| 53 | |||
| 54 | EXTRA ATTRIBUTES AND METHODS | ||
| 55 | |||
| 56 | Upon a status of 200, the object returned has a few additional | ||
| 57 | attributes and methods, which should not be used if you want to | ||
| 58 | remain consistent with the normal urllib2-returned objects: | ||
| 59 | |||
| 60 | close_connection() - close the connection to the host | ||
| 61 | readlines() - you know, readlines() | ||
| 62 | status - the return status (ie 404) | ||
| 63 | reason - english translation of status (ie 'File not found') | ||
| 64 | |||
| 65 | If you want the best of both worlds, use this inside an | ||
| 66 | AttributeError-catching try: | ||
| 67 | |||
| 68 | >>> try: status = fo.status | ||
| 69 | >>> except AttributeError: status = None | ||
| 70 | |||
| 71 | Unfortunately, these are ONLY there if status == 200, so it's not | ||
| 72 | easy to distinguish between non-200 responses. The reason is that | ||
| 73 | urllib2 tries to do clever things with error codes 301, 302, 401, | ||
| 74 | and 407, and it wraps the object upon return. | ||
| 75 | |||
| 76 | For python versions earlier than 2.4, you can avoid this fancy error | ||
| 77 | handling by setting the module-level global HANDLE_ERRORS to zero. | ||
| 78 | You see, prior to 2.4, it's the HTTP Handler's job to determine what | ||
| 79 | to handle specially, and what to just pass up. HANDLE_ERRORS == 0 | ||
| 80 | means "pass everything up". In python 2.4, however, this job no | ||
| 81 | longer belongs to the HTTP Handler and is now done by a NEW handler, | ||
| 82 | HTTPErrorProcessor. Here's the bottom line: | ||
| 83 | |||
| 84 | python version < 2.4 | ||
| 85 | HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as | ||
| 86 | errors | ||
| 87 | HANDLE_ERRORS == 0 pass everything up, error processing is | ||
| 88 | left to the calling code | ||
| 89 | python version >= 2.4 | ||
| 90 | HANDLE_ERRORS == 1 pass up 200, treat the rest as errors | ||
| 91 | HANDLE_ERRORS == 0 (default) pass everything up, let the | ||
| 92 | other handlers (specifically, | ||
| 93 | HTTPErrorProcessor) decide what to do | ||
| 94 | |||
| 95 | In practice, setting the variable either way makes little difference | ||
| 96 | in python 2.4, so for the most consistent behavior across versions, | ||
| 97 | you probably just want to use the defaults, which will give you | ||
| 98 | exceptions on errors. | ||
| 99 | |||
| 100 | """ | ||
| 101 | |||
| 102 | # $Id: keepalive.py,v 1.16 2006/09/22 00:58:05 mstenner Exp $ | ||
| 103 | |||
| 104 | import urllib2 | ||
| 105 | import httplib | ||
| 106 | import socket | ||
| 107 | import thread | ||
| 108 | |||
| 109 | DEBUG = None | ||
| 110 | |||
| 111 | import sslfactory | ||
| 112 | |||
| 113 | import sys | ||
| 114 | if sys.version_info < (2, 4): HANDLE_ERRORS = 1 | ||
| 115 | else: HANDLE_ERRORS = 0 | ||
| 116 | |||
| 117 | class ConnectionManager: | ||
| 118 | """ | ||
| 119 | The connection manager must be able to: | ||
| 120 | * keep track of all existing | ||
| 121 | """ | ||
| 122 | def __init__(self): | ||
| 123 | self._lock = thread.allocate_lock() | ||
| 124 | self._hostmap = {} # map hosts to a list of connections | ||
| 125 | self._connmap = {} # map connections to host | ||
| 126 | self._readymap = {} # map connection to ready state | ||
| 127 | |||
| 128 | def add(self, host, connection, ready): | ||
| 129 | self._lock.acquire() | ||
| 130 | try: | ||
| 131 | if not self._hostmap.has_key(host): self._hostmap[host] = [] | ||
| 132 | self._hostmap[host].append(connection) | ||
| 133 | self._connmap[connection] = host | ||
| 134 | self._readymap[connection] = ready | ||
| 135 | finally: | ||
| 136 | self._lock.release() | ||
| 137 | |||
| 138 | def remove(self, connection): | ||
| 139 | self._lock.acquire() | ||
| 140 | try: | ||
| 141 | try: | ||
| 142 | host = self._connmap[connection] | ||
| 143 | except KeyError: | ||
| 144 | pass | ||
| 145 | else: | ||
| 146 | del self._connmap[connection] | ||
| 147 | del self._readymap[connection] | ||
| 148 | self._hostmap[host].remove(connection) | ||
| 149 | if not self._hostmap[host]: del self._hostmap[host] | ||
| 150 | finally: | ||
| 151 | self._lock.release() | ||
| 152 | |||
| 153 | def set_ready(self, connection, ready): | ||
| 154 | try: self._readymap[connection] = ready | ||
| 155 | except KeyError: pass | ||
| 156 | |||
| 157 | def get_ready_conn(self, host): | ||
| 158 | conn = None | ||
| 159 | self._lock.acquire() | ||
| 160 | try: | ||
| 161 | if self._hostmap.has_key(host): | ||
| 162 | for c in self._hostmap[host]: | ||
| 163 | if self._readymap[c]: | ||
| 164 | self._readymap[c] = 0 | ||
| 165 | conn = c | ||
| 166 | break | ||
| 167 | finally: | ||
| 168 | self._lock.release() | ||
| 169 | return conn | ||
| 170 | |||
| 171 | def get_all(self, host=None): | ||
| 172 | if host: | ||
| 173 | return list(self._hostmap.get(host, [])) | ||
| 174 | else: | ||
| 175 | return dict(self._hostmap) | ||
| 176 | |||
| 177 | class KeepAliveHandler: | ||
| 178 | def __init__(self): | ||
| 179 | self._cm = ConnectionManager() | ||
| 180 | |||
| 181 | #### Connection Management | ||
| 182 | def open_connections(self): | ||
| 183 | """return a list of connected hosts and the number of connections | ||
| 184 | to each. [('foo.com:80', 2), ('bar.org', 1)]""" | ||
| 185 | return [(host, len(li)) for (host, li) in self._cm.get_all().items()] | ||
| 186 | |||
| 187 | def close_connection(self, host): | ||
| 188 | """close connection(s) to <host> | ||
| 189 | host is the host:port spec, as in 'www.cnn.com:8080' as passed in. | ||
| 190 | no error occurs if there is no connection to that host.""" | ||
| 191 | for h in self._cm.get_all(host): | ||
| 192 | self._cm.remove(h) | ||
| 193 | h.close() | ||
| 194 | |||
| 195 | def close_all(self): | ||
| 196 | """close all open connections""" | ||
| 197 | for host, conns in self._cm.get_all().items(): | ||
| 198 | for h in conns: | ||
| 199 | self._cm.remove(h) | ||
| 200 | h.close() | ||
| 201 | |||
| 202 | def _request_closed(self, request, host, connection): | ||
| 203 | """tells us that this request is now closed and the the | ||
| 204 | connection is ready for another request""" | ||
| 205 | self._cm.set_ready(connection, 1) | ||
| 206 | |||
| 207 | def _remove_connection(self, host, connection, close=0): | ||
| 208 | if close: connection.close() | ||
| 209 | self._cm.remove(connection) | ||
| 210 | |||
| 211 | #### Transaction Execution | ||
| 212 | def do_open(self, req): | ||
| 213 | host = req.get_host() | ||
| 214 | if not host: | ||
| 215 | raise urllib2.URLError('no host given') | ||
| 216 | |||
| 217 | try: | ||
| 218 | h = self._cm.get_ready_conn(host) | ||
| 219 | while h: | ||
| 220 | r = self._reuse_connection(h, req, host) | ||
| 221 | |||
| 222 | # if this response is non-None, then it worked and we're | ||
| 223 | # done. Break out, skipping the else block. | ||
| 224 | if r: break | ||
| 225 | |||
| 226 | # connection is bad - possibly closed by server | ||
| 227 | # discard it and ask for the next free connection | ||
| 228 | h.close() | ||
| 229 | self._cm.remove(h) | ||
| 230 | h = self._cm.get_ready_conn(host) | ||
| 231 | else: | ||
| 232 | # no (working) free connections were found. Create a new one. | ||
| 233 | h = self._get_connection(host) | ||
| 234 | if DEBUG: DEBUG.info("creating new connection to %s (%d)", | ||
| 235 | host, id(h)) | ||
| 236 | self._cm.add(host, h, 0) | ||
| 237 | self._start_transaction(h, req) | ||
| 238 | r = h.getresponse() | ||
| 239 | except (socket.error, httplib.HTTPException), err: | ||
| 240 | raise urllib2.URLError(err) | ||
| 241 | |||
| 242 | # if not a persistent connection, don't try to reuse it | ||
| 243 | if r.will_close: self._cm.remove(h) | ||
| 244 | |||
| 245 | if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason) | ||
| 246 | r._handler = self | ||
| 247 | r._host = host | ||
| 248 | r._url = req.get_full_url() | ||
| 249 | r._connection = h | ||
| 250 | r.code = r.status | ||
| 251 | r.headers = r.msg | ||
| 252 | r.msg = r.reason | ||
| 253 | |||
| 254 | if r.status == 200 or not HANDLE_ERRORS: | ||
| 255 | return r | ||
| 256 | else: | ||
| 257 | return self.parent.error('http', req, r, | ||
| 258 | r.status, r.msg, r.headers) | ||
| 259 | |||
| 260 | def _reuse_connection(self, h, req, host): | ||
| 261 | """start the transaction with a re-used connection | ||
| 262 | return a response object (r) upon success or None on failure. | ||
| 263 | This DOES not close or remove bad connections in cases where | ||
| 264 | it returns. However, if an unexpected exception occurs, it | ||
| 265 | will close and remove the connection before re-raising. | ||
| 266 | """ | ||
| 267 | try: | ||
| 268 | self._start_transaction(h, req) | ||
| 269 | r = h.getresponse() | ||
| 270 | # note: just because we got something back doesn't mean it | ||
| 271 | # worked. We'll check the version below, too. | ||
| 272 | except (socket.error, httplib.HTTPException): | ||
| 273 | r = None | ||
| 274 | except: | ||
| 275 | # adding this block just in case we've missed | ||
| 276 | # something we will still raise the exception, but | ||
| 277 | # lets try and close the connection and remove it | ||
| 278 | # first. We previously got into a nasty loop | ||
| 279 | # where an exception was uncaught, and so the | ||
| 280 | # connection stayed open. On the next try, the | ||
| 281 | # same exception was raised, etc. The tradeoff is | ||
| 282 | # that it's now possible this call will raise | ||
| 283 | # a DIFFERENT exception | ||
| 284 | if DEBUG: DEBUG.error("unexpected exception - closing " + \ | ||
| 285 | "connection to %s (%d)", host, id(h)) | ||
| 286 | self._cm.remove(h) | ||
| 287 | h.close() | ||
| 288 | raise | ||
| 289 | |||
| 290 | if r is None or r.version == 9: | ||
| 291 | # httplib falls back to assuming HTTP 0.9 if it gets a | ||
| 292 | # bad header back. This is most likely to happen if | ||
| 293 | # the socket has been closed by the server since we | ||
| 294 | # last used the connection. | ||
| 295 | if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)", | ||
| 296 | host, id(h)) | ||
| 297 | r = None | ||
| 298 | else: | ||
| 299 | if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h)) | ||
| 300 | |||
| 301 | return r | ||
| 302 | |||
| 303 | def _start_transaction(self, h, req): | ||
| 304 | try: | ||
| 305 | if req.has_data(): | ||
| 306 | data = req.get_data() | ||
| 307 | h.putrequest('POST', req.get_selector()) | ||
| 308 | if not req.headers.has_key('Content-type'): | ||
| 309 | h.putheader('Content-type', | ||
| 310 | 'application/x-www-form-urlencoded') | ||
| 311 | if not req.headers.has_key('Content-length'): | ||
| 312 | h.putheader('Content-length', '%d' % len(data)) | ||
| 313 | else: | ||
| 314 | h.putrequest('GET', req.get_selector()) | ||
| 315 | except (socket.error, httplib.HTTPException), err: | ||
| 316 | raise urllib2.URLError(err) | ||
| 317 | |||
| 318 | for args in self.parent.addheaders: | ||
| 319 | h.putheader(*args) | ||
| 320 | for k, v in req.headers.items(): | ||
| 321 | h.putheader(k, v) | ||
| 322 | h.endheaders() | ||
| 323 | if req.has_data(): | ||
| 324 | h.send(data) | ||
| 325 | |||
| 326 | def _get_connection(self, host): | ||
| 327 | return NotImplementedError | ||
| 328 | |||
| 329 | class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler): | ||
| 330 | def __init__(self): | ||
| 331 | KeepAliveHandler.__init__(self) | ||
| 332 | |||
| 333 | def http_open(self, req): | ||
| 334 | return self.do_open(req) | ||
| 335 | |||
| 336 | def _get_connection(self, host): | ||
| 337 | return HTTPConnection(host) | ||
| 338 | |||
| 339 | class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler): | ||
| 340 | def __init__(self, ssl_factory=None): | ||
| 341 | KeepAliveHandler.__init__(self) | ||
| 342 | if not ssl_factory: | ||
| 343 | ssl_factory = sslfactory.get_factory() | ||
| 344 | self._ssl_factory = ssl_factory | ||
| 345 | |||
| 346 | def https_open(self, req): | ||
| 347 | return self.do_open(req) | ||
| 348 | |||
| 349 | def _get_connection(self, host): | ||
| 350 | return self._ssl_factory.get_https_connection(host) | ||
| 351 | |||
| 352 | class HTTPResponse(httplib.HTTPResponse): | ||
| 353 | # we need to subclass HTTPResponse in order to | ||
| 354 | # 1) add readline() and readlines() methods | ||
| 355 | # 2) add close_connection() methods | ||
| 356 | # 3) add info() and geturl() methods | ||
| 357 | |||
| 358 | # in order to add readline(), read must be modified to deal with a | ||
| 359 | # buffer. example: readline must read a buffer and then spit back | ||
| 360 | # one line at a time. The only real alternative is to read one | ||
| 361 | # BYTE at a time (ick). Once something has been read, it can't be | ||
| 362 | # put back (ok, maybe it can, but that's even uglier than this), | ||
| 363 | # so if you THEN do a normal read, you must first take stuff from | ||
| 364 | # the buffer. | ||
| 365 | |||
| 366 | # the read method wraps the original to accomodate buffering, | ||
| 367 | # although read() never adds to the buffer. | ||
| 368 | # Both readline and readlines have been stolen with almost no | ||
| 369 | # modification from socket.py | ||
| 370 | |||
| 371 | |||
| 372 | def __init__(self, sock, debuglevel=0, strict=0, method=None): | ||
| 373 | if method: # the httplib in python 2.3 uses the method arg | ||
| 374 | httplib.HTTPResponse.__init__(self, sock, debuglevel, method) | ||
| 375 | else: # 2.2 doesn't | ||
| 376 | httplib.HTTPResponse.__init__(self, sock, debuglevel) | ||
| 377 | self.fileno = sock.fileno | ||
| 378 | self.code = None | ||
| 379 | self._rbuf = '' | ||
| 380 | self._rbufsize = 8096 | ||
| 381 | self._handler = None # inserted by the handler later | ||
| 382 | self._host = None # (same) | ||
| 383 | self._url = None # (same) | ||
| 384 | self._connection = None # (same) | ||
| 385 | |||
| 386 | _raw_read = httplib.HTTPResponse.read | ||
| 387 | |||
| 388 | def close(self): | ||
| 389 | if self.fp: | ||
| 390 | self.fp.close() | ||
| 391 | self.fp = None | ||
| 392 | if self._handler: | ||
| 393 | self._handler._request_closed(self, self._host, | ||
| 394 | self._connection) | ||
| 395 | |||
| 396 | def close_connection(self): | ||
| 397 | self._handler._remove_connection(self._host, self._connection, close=1) | ||
| 398 | self.close() | ||
| 399 | |||
| 400 | def info(self): | ||
| 401 | return self.headers | ||
| 402 | |||
| 403 | def geturl(self): | ||
| 404 | return self._url | ||
| 405 | |||
| 406 | def read(self, amt=None): | ||
| 407 | # the _rbuf test is only in this first if for speed. It's not | ||
| 408 | # logically necessary | ||
| 409 | if self._rbuf and not amt is None: | ||
| 410 | L = len(self._rbuf) | ||
| 411 | if amt > L: | ||
| 412 | amt -= L | ||
| 413 | else: | ||
| 414 | s = self._rbuf[:amt] | ||
| 415 | self._rbuf = self._rbuf[amt:] | ||
| 416 | return s | ||
| 417 | |||
| 418 | s = self._rbuf + self._raw_read(amt) | ||
| 419 | self._rbuf = '' | ||
| 420 | return s | ||
| 421 | |||
| 422 | def readline(self, limit=-1): | ||
| 423 | data = "" | ||
| 424 | i = self._rbuf.find('\n') | ||
| 425 | while i < 0 and not (0 < limit <= len(self._rbuf)): | ||
| 426 | new = self._raw_read(self._rbufsize) | ||
| 427 | if not new: break | ||
| 428 | i = new.find('\n') | ||
| 429 | if i >= 0: i = i + len(self._rbuf) | ||
| 430 | self._rbuf = self._rbuf + new | ||
| 431 | if i < 0: i = len(self._rbuf) | ||
| 432 | else: i = i+1 | ||
| 433 | if 0 <= limit < len(self._rbuf): i = limit | ||
| 434 | data, self._rbuf = self._rbuf[:i], self._rbuf[i:] | ||
| 435 | return data | ||
| 436 | |||
| 437 | def readlines(self, sizehint = 0): | ||
| 438 | total = 0 | ||
| 439 | list = [] | ||
| 440 | while 1: | ||
| 441 | line = self.readline() | ||
| 442 | if not line: break | ||
| 443 | list.append(line) | ||
| 444 | total += len(line) | ||
| 445 | if sizehint and total >= sizehint: | ||
| 446 | break | ||
| 447 | return list | ||
| 448 | |||
| 449 | |||
| 450 | class HTTPConnection(httplib.HTTPConnection): | ||
| 451 | # use the modified response class | ||
| 452 | response_class = HTTPResponse | ||
| 453 | |||
| 454 | class HTTPSConnection(httplib.HTTPSConnection): | ||
| 455 | response_class = HTTPResponse | ||
| 456 | |||
| 457 | ######################################################################### | ||
| 458 | ##### TEST FUNCTIONS | ||
| 459 | ######################################################################### | ||
| 460 | |||
| 461 | def error_handler(url): | ||
| 462 | global HANDLE_ERRORS | ||
| 463 | orig = HANDLE_ERRORS | ||
| 464 | keepalive_handler = HTTPHandler() | ||
| 465 | opener = urllib2.build_opener(keepalive_handler) | ||
| 466 | urllib2.install_opener(opener) | ||
| 467 | pos = {0: 'off', 1: 'on'} | ||
| 468 | for i in (0, 1): | ||
| 469 | print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i) | ||
| 470 | HANDLE_ERRORS = i | ||
| 471 | try: | ||
| 472 | fo = urllib2.urlopen(url) | ||
| 473 | foo = fo.read() | ||
| 474 | fo.close() | ||
| 475 | try: status, reason = fo.status, fo.reason | ||
| 476 | except AttributeError: status, reason = None, None | ||
| 477 | except IOError, e: | ||
| 478 | print " EXCEPTION: %s" % e | ||
| 479 | raise | ||
| 480 | else: | ||
| 481 | print " status = %s, reason = %s" % (status, reason) | ||
| 482 | HANDLE_ERRORS = orig | ||
| 483 | hosts = keepalive_handler.open_connections() | ||
| 484 | print "open connections:", hosts | ||
| 485 | keepalive_handler.close_all() | ||
| 486 | |||
| 487 | def continuity(url): | ||
| 488 | import md5 | ||
| 489 | format = '%25s: %s' | ||
| 490 | |||
| 491 | # first fetch the file with the normal http handler | ||
| 492 | opener = urllib2.build_opener() | ||
| 493 | urllib2.install_opener(opener) | ||
| 494 | fo = urllib2.urlopen(url) | ||
| 495 | foo = fo.read() | ||
| 496 | fo.close() | ||
| 497 | m = md5.new(foo) | ||
| 498 | print format % ('normal urllib', m.hexdigest()) | ||
| 499 | |||
| 500 | # now install the keepalive handler and try again | ||
| 501 | opener = urllib2.build_opener(HTTPHandler()) | ||
| 502 | urllib2.install_opener(opener) | ||
| 503 | |||
| 504 | fo = urllib2.urlopen(url) | ||
| 505 | foo = fo.read() | ||
| 506 | fo.close() | ||
| 507 | m = md5.new(foo) | ||
| 508 | print format % ('keepalive read', m.hexdigest()) | ||
| 509 | |||
| 510 | fo = urllib2.urlopen(url) | ||
| 511 | foo = '' | ||
| 512 | while 1: | ||
| 513 | f = fo.readline() | ||
| 514 | if f: foo = foo + f | ||
| 515 | else: break | ||
| 516 | fo.close() | ||
| 517 | m = md5.new(foo) | ||
| 518 | print format % ('keepalive readline', m.hexdigest()) | ||
| 519 | |||
| 520 | def comp(N, url): | ||
| 521 | print ' making %i connections to:\n %s' % (N, url) | ||
| 522 | |||
| 523 | sys.stdout.write(' first using the normal urllib handlers') | ||
| 524 | # first use normal opener | ||
| 525 | opener = urllib2.build_opener() | ||
| 526 | urllib2.install_opener(opener) | ||
| 527 | t1 = fetch(N, url) | ||
| 528 | print ' TIME: %.3f s' % t1 | ||
| 529 | |||
| 530 | sys.stdout.write(' now using the keepalive handler ') | ||
| 531 | # now install the keepalive handler and try again | ||
| 532 | opener = urllib2.build_opener(HTTPHandler()) | ||
| 533 | urllib2.install_opener(opener) | ||
| 534 | t2 = fetch(N, url) | ||
| 535 | print ' TIME: %.3f s' % t2 | ||
| 536 | print ' improvement factor: %.2f' % (t1/t2, ) | ||
| 537 | |||
| 538 | def fetch(N, url, delay=0): | ||
| 539 | import time | ||
| 540 | lens = [] | ||
| 541 | starttime = time.time() | ||
| 542 | for i in range(N): | ||
| 543 | if delay and i > 0: time.sleep(delay) | ||
| 544 | fo = urllib2.urlopen(url) | ||
| 545 | foo = fo.read() | ||
| 546 | fo.close() | ||
| 547 | lens.append(len(foo)) | ||
| 548 | diff = time.time() - starttime | ||
| 549 | |||
| 550 | j = 0 | ||
| 551 | for i in lens[1:]: | ||
| 552 | j = j + 1 | ||
| 553 | if not i == lens[0]: | ||
| 554 | print "WARNING: inconsistent length on read %i: %i" % (j, i) | ||
| 555 | |||
| 556 | return diff | ||
| 557 | |||
| 558 | def test_timeout(url): | ||
| 559 | global DEBUG | ||
| 560 | dbbackup = DEBUG | ||
| 561 | class FakeLogger: | ||
| 562 | def debug(self, msg, *args): print msg % args | ||
| 563 | info = warning = error = debug | ||
| 564 | DEBUG = FakeLogger() | ||
| 565 | print " fetching the file to establish a connection" | ||
| 566 | fo = urllib2.urlopen(url) | ||
| 567 | data1 = fo.read() | ||
| 568 | fo.close() | ||
| 569 | |||
| 570 | i = 20 | ||
| 571 | print " waiting %i seconds for the server to close the connection" % i | ||
| 572 | while i > 0: | ||
| 573 | sys.stdout.write('\r %2i' % i) | ||
| 574 | sys.stdout.flush() | ||
| 575 | time.sleep(1) | ||
| 576 | i -= 1 | ||
| 577 | sys.stderr.write('\r') | ||
| 578 | |||
| 579 | print " fetching the file a second time" | ||
| 580 | fo = urllib2.urlopen(url) | ||
| 581 | data2 = fo.read() | ||
| 582 | fo.close() | ||
| 583 | |||
| 584 | if data1 == data2: | ||
| 585 | print ' data are identical' | ||
| 586 | else: | ||
| 587 | print ' ERROR: DATA DIFFER' | ||
| 588 | |||
| 589 | DEBUG = dbbackup | ||
| 590 | |||
| 591 | |||
| 592 | def test(url, N=10): | ||
| 593 | print "checking error hander (do this on a non-200)" | ||
| 594 | try: error_handler(url) | ||
| 595 | except IOError, e: | ||
| 596 | print "exiting - exception will prevent further tests" | ||
| 597 | sys.exit() | ||
| 598 | |||
| 599 | print "performing continuity test (making sure stuff isn't corrupted)" | ||
| 600 | continuity(url) | ||
| 601 | |||
| 602 | print "performing speed comparison" | ||
| 603 | comp(N, url) | ||
| 604 | |||
| 605 | print "performing dropped-connection check" | ||
| 606 | test_timeout(url) | ||
| 607 | |||
| 608 | if __name__ == '__main__': | ||
| 609 | import time | ||
| 610 | import sys | ||
| 611 | try: | ||
| 612 | N = int(sys.argv[1]) | ||
| 613 | url = sys.argv[2] | ||
| 614 | except: | ||
| 615 | print "%s <integer> <url>" % sys.argv[0] | ||
| 616 | else: | ||
| 617 | test(url, N) | ||
