1209 lines
		
	
	
		
			50 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1209 lines
		
	
	
		
			50 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import generators
 | |
| """
 | |
| httplib2
 | |
| 
 | |
| A caching http interface that supports ETags and gzip
 | |
| to conserve bandwidth.
 | |
| 
 | |
| Requires Python 2.3 or later
 | |
| 
 | |
| Changelog:
 | |
| 2007-08-18, Rick: Modified so it's able to use a socks proxy if needed.
 | |
| 
 | |
| """
 | |
| 
 | |
| __author__ = "Joe Gregorio (joe@bitworking.org)"
 | |
| __copyright__ = "Copyright 2006, Joe Gregorio"
 | |
| __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
 | |
|     "James Antill",
 | |
|     "Xavier Verges Farrero",
 | |
|     "Jonathan Feinberg",
 | |
|     "Blair Zajac",
 | |
|     "Sam Ruby",
 | |
|     "Louis Nyffenegger"]
 | |
| __license__ = "MIT"
 | |
| __version__ = "$Rev$"
 | |
| 
 | |
| import re
 | |
| import sys
 | |
| import email
 | |
| import email.Utils
 | |
| import email.Message
 | |
| import email.FeedParser
 | |
| import StringIO
 | |
| import gzip
 | |
| import zlib
 | |
| import httplib
 | |
| import urlparse
 | |
| import base64
 | |
| import os
 | |
| import copy
 | |
| import calendar
 | |
| import time
 | |
| import random
 | |
| import errno
 | |
| # remove depracated warning in python2.6
 | |
| try:
 | |
|     from hashlib import sha1 as _sha, md5 as _md5
 | |
| except ImportError:
 | |
|     import sha
 | |
|     import md5
 | |
|     _sha = sha.new
 | |
|     _md5 = md5.new
 | |
| import hmac
 | |
| from gettext import gettext as _
 | |
| import socket
 | |
| 
 | |
| try:
 | |
|   from httplib2 import socks
 | |
| except ImportError:
 | |
|   socks = None
 | |
| 
 | |
| # Build the appropriate socket wrapper for ssl
 | |
| try:
 | |
|     import ssl # python 2.6
 | |
|     _ssl_wrap_socket = ssl.wrap_socket
 | |
| except ImportError:
 | |
|     def _ssl_wrap_socket(sock, key_file, cert_file):
 | |
|         ssl_sock = socket.ssl(sock, key_file, cert_file)
 | |
|         return httplib.FakeSocket(sock, ssl_sock)
 | |
| 
 | |
| 
 | |
| if sys.version_info >= (2,3):
 | |
|     from iri2uri import iri2uri
 | |
| else:
 | |
|     def iri2uri(uri):
 | |
|         return uri
 | |
| 
 | |
| def has_timeout(timeout): # python 2.6
 | |
|     if hasattr(socket, '_GLOBAL_DEFAULT_TIMEOUT'):
 | |
|         return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT)
 | |
|     return (timeout is not None)
 | |
| 
 | |
| __all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error',
 | |
|   'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
 | |
|   'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
 | |
|   'debuglevel']
 | |
| 
 | |
| 
 | |
| # The httplib debug level, set to a non-zero value to get debug output
 | |
| debuglevel = 0
 | |
| 
 | |
| 
 | |
| # Python 2.3 support
 | |
| if sys.version_info < (2,4):
 | |
|     def sorted(seq):
 | |
|         seq.sort()
 | |
|         return seq
 | |
| 
 | |
| # Python 2.3 support
 | |
| def HTTPResponse__getheaders(self):
 | |
|     """Return list of (header, value) tuples."""
 | |
|     if self.msg is None:
 | |
|         raise httplib.ResponseNotReady()
 | |
|     return self.msg.items()
 | |
| 
 | |
| if not hasattr(httplib.HTTPResponse, 'getheaders'):
 | |
|     httplib.HTTPResponse.getheaders = HTTPResponse__getheaders
 | |
| 
 | |
| # All exceptions raised here derive from HttpLib2Error
 | |
| class HttpLib2Error(Exception): pass
 | |
| 
 | |
| # Some exceptions can be caught and optionally
 | |
| # be turned back into responses.
 | |
| class HttpLib2ErrorWithResponse(HttpLib2Error):
 | |
|     def __init__(self, desc, response, content):
 | |
|         self.response = response
 | |
|         self.content = content
 | |
|         HttpLib2Error.__init__(self, desc)
 | |
| 
 | |
| class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass
 | |
| class RedirectLimit(HttpLib2ErrorWithResponse): pass
 | |
| class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass
 | |
| class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
 | |
| class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass
 | |
| 
 | |
| class RelativeURIError(HttpLib2Error): pass
 | |
| class ServerNotFoundError(HttpLib2Error): pass
 | |
| 
 | |
| # Open Items:
 | |
| # -----------
 | |
| # Proxy support
 | |
| 
 | |
| # Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
 | |
| 
 | |
| # Pluggable cache storage (supports storing the cache in
 | |
| #   flat files by default. We need a plug-in architecture
 | |
| #   that can support Berkeley DB and Squid)
 | |
| 
 | |
| # == Known Issues ==
 | |
| # Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
 | |
| # Does not handle Cache-Control: max-stale
 | |
| # Does not use Age: headers when calculating cache freshness.
 | |
| 
 | |
| 
 | |
| # The number of redirections to follow before giving up.
 | |
| # Note that only GET redirects are automatically followed.
 | |
| # Will also honor 301 requests by saving that info and never
 | |
| # requesting that URI again.
 | |
| DEFAULT_MAX_REDIRECTS = 5
 | |
| 
 | |
| # Which headers are hop-by-hop headers by default
 | |
| HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade']
 | |
| 
 | |
| def _get_end2end_headers(response):
 | |
|     hopbyhop = list(HOP_BY_HOP)
 | |
|     hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')])
 | |
|     return [header for header in response.keys() if header not in hopbyhop]
 | |
| 
 | |
| URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
 | |
| 
 | |
| def parse_uri(uri):
 | |
|     """Parses a URI using the regex given in Appendix B of RFC 3986.
 | |
| 
 | |
|         (scheme, authority, path, query, fragment) = parse_uri(uri)
 | |
|     """
 | |
|     groups = URI.match(uri).groups()
 | |
|     return (groups[1], groups[3], groups[4], groups[6], groups[8])
 | |
| 
 | |
| def urlnorm(uri):
 | |
|     (scheme, authority, path, query, fragment) = parse_uri(uri)
 | |
|     if not scheme or not authority:
 | |
|         raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
 | |
|     authority = authority.lower()
 | |
|     scheme = scheme.lower()
 | |
|     if not path:
 | |
|         path = "/"
 | |
|     # Could do syntax based normalization of the URI before
 | |
|     # computing the digest. See Section 6.2.2 of Std 66.
 | |
|     request_uri = query and "?".join([path, query]) or path
 | |
|     scheme = scheme.lower()
 | |
|     defrag_uri = scheme + "://" + authority + request_uri
 | |
|     return scheme, authority, request_uri, defrag_uri
 | |
| 
 | |
| 
 | |
| # Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
 | |
| re_url_scheme    = re.compile(r'^\w+://')
 | |
| re_slash         = re.compile(r'[?/:|]+')
 | |
| 
 | |
| def safename(filename):
 | |
|     """Return a filename suitable for the cache.
 | |
| 
 | |
|     Strips dangerous and common characters to create a filename we
 | |
|     can use to store the cache in.
 | |
|     """
 | |
| 
 | |
|     try:
 | |
|         if re_url_scheme.match(filename):
 | |
|             if isinstance(filename,str):
 | |
|                 filename = filename.decode('utf-8')
 | |
|                 filename = filename.encode('idna')
 | |
|             else:
 | |
|                 filename = filename.encode('idna')
 | |
|     except UnicodeError:
 | |
|         pass
 | |
|     if isinstance(filename,unicode):
 | |
|         filename=filename.encode('utf-8')
 | |
|     filemd5 = _md5(filename).hexdigest()
 | |
|     filename = re_url_scheme.sub("", filename)
 | |
|     filename = re_slash.sub(",", filename)
 | |
| 
 | |
|     # limit length of filename
 | |
|     if len(filename)>200:
 | |
|         filename=filename[:200]
 | |
|     return ",".join((filename, filemd5))
 | |
| 
 | |
| NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+')
 | |
| def _normalize_headers(headers):
 | |
|     return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip())  for (key, value) in headers.iteritems()])
 | |
| 
 | |
| def _parse_cache_control(headers):
 | |
|     retval = {}
 | |
|     if headers.has_key('cache-control'):
 | |
|         parts =  headers['cache-control'].split(',')
 | |
|         parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")]
 | |
|         parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
 | |
|         retval = dict(parts_with_args + parts_wo_args)
 | |
|     return retval
 | |
| 
 | |
| # Whether to use a strict mode to parse WWW-Authenticate headers
 | |
| # Might lead to bad results in case of ill-formed header value,
 | |
| # so disabled by default, falling back to relaxed parsing.
 | |
| # Set to true to turn on, usefull for testing servers.
 | |
| USE_WWW_AUTH_STRICT_PARSING = 0
 | |
| 
 | |
| # In regex below:
 | |
| #    [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+             matches a "token" as defined by HTTP
 | |
| #    "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?"    matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space
 | |
| # Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both:
 | |
| #    \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?
 | |
| WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$")
 | |
| WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$")
 | |
| UNQUOTE_PAIRS = re.compile(r'\\(.)')
 | |
| def _parse_www_authenticate(headers, headername='www-authenticate'):
 | |
|     """Returns a dictionary of dictionaries, one dict
 | |
|     per auth_scheme."""
 | |
|     retval = {}
 | |
|     if headers.has_key(headername):
 | |
|         authenticate = headers[headername].strip()
 | |
|         www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED
 | |
|         while authenticate:
 | |
|             # Break off the scheme at the beginning of the line
 | |
|             if headername == 'authentication-info':
 | |
|                 (auth_scheme, the_rest) = ('digest', authenticate)
 | |
|             else:
 | |
|                 (auth_scheme, the_rest) = authenticate.split(" ", 1)
 | |
|             # Now loop over all the key value pairs that come after the scheme,
 | |
|             # being careful not to roll into the next scheme
 | |
|             match = www_auth.search(the_rest)
 | |
|             auth_params = {}
 | |
|             while match:
 | |
|                 if match and len(match.groups()) == 3:
 | |
|                     (key, value, the_rest) = match.groups()
 | |
|                     auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')])
 | |
|                 match = www_auth.search(the_rest)
 | |
|             retval[auth_scheme.lower()] = auth_params
 | |
|             authenticate = the_rest.strip()
 | |
|     return retval
 | |
| 
 | |
| 
 | |
| def _entry_disposition(response_headers, request_headers):
 | |
|     """Determine freshness from the Date, Expires and Cache-Control headers.
 | |
| 
 | |
|     We don't handle the following:
 | |
| 
 | |
|     1. Cache-Control: max-stale
 | |
|     2. Age: headers are not used in the calculations.
 | |
| 
 | |
|     Not that this algorithm is simpler than you might think
 | |
|     because we are operating as a private (non-shared) cache.
 | |
|     This lets us ignore 's-maxage'. We can also ignore
 | |
|     'proxy-invalidate' since we aren't a proxy.
 | |
|     We will never return a stale document as
 | |
|     fresh as a design decision, and thus the non-implementation
 | |
|     of 'max-stale'. This also lets us safely ignore 'must-revalidate'
 | |
|     since we operate as if every server has sent 'must-revalidate'.
 | |
|     Since we are private we get to ignore both 'public' and
 | |
|     'private' parameters. We also ignore 'no-transform' since
 | |
|     we don't do any transformations.
 | |
|     The 'no-store' parameter is handled at a higher level.
 | |
|     So the only Cache-Control parameters we look at are:
 | |
| 
 | |
|     no-cache
 | |
|     only-if-cached
 | |
|     max-age
 | |
|     min-fresh
 | |
|     """
 | |
| 
 | |
|     retval = "STALE"
 | |
|     cc = _parse_cache_control(request_headers)
 | |
|     cc_response = _parse_cache_control(response_headers)
 | |
| 
 | |
|     if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1:
 | |
|         retval = "TRANSPARENT"
 | |
|         if 'cache-control' not in request_headers:
 | |
|             request_headers['cache-control'] = 'no-cache'
 | |
|     elif cc.has_key('no-cache'):
 | |
|         retval = "TRANSPARENT"
 | |
|     elif cc_response.has_key('no-cache'):
 | |
|         retval = "STALE"
 | |
|     elif cc.has_key('only-if-cached'):
 | |
|         retval = "FRESH"
 | |
|     elif response_headers.has_key('date'):
 | |
|         date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date']))
 | |
|         now = time.time()
 | |
|         current_age = max(0, now - date)
 | |
|         if cc_response.has_key('max-age'):
 | |
|             try:
 | |
|                 freshness_lifetime = int(cc_response['max-age'])
 | |
|             except ValueError:
 | |
|                 freshness_lifetime = 0
 | |
|         elif response_headers.has_key('expires'):
 | |
|             expires = email.Utils.parsedate_tz(response_headers['expires'])
 | |
|             if None == expires:
 | |
|                 freshness_lifetime = 0
 | |
|             else:
 | |
|                 freshness_lifetime = max(0, calendar.timegm(expires) - date)
 | |
|         else:
 | |
|             freshness_lifetime = 0
 | |
|         if cc.has_key('max-age'):
 | |
|             try:
 | |
|                 freshness_lifetime = int(cc['max-age'])
 | |
|             except ValueError:
 | |
|                 freshness_lifetime = 0
 | |
|         if cc.has_key('min-fresh'):
 | |
|             try:
 | |
|                 min_fresh = int(cc['min-fresh'])
 | |
|             except ValueError:
 | |
|                 min_fresh = 0
 | |
|             current_age += min_fresh
 | |
|         if freshness_lifetime > current_age:
 | |
|             retval = "FRESH"
 | |
|     return retval
 | |
| 
 | |
| def _decompressContent(response, new_content):
 | |
|     content = new_content
 | |
|     try:
 | |
|         encoding = response.get('content-encoding', None)
 | |
|         if encoding in ['gzip', 'deflate']:
 | |
|             if encoding == 'gzip':
 | |
|                 content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read()
 | |
|             if encoding == 'deflate':
 | |
|                 content = zlib.decompress(content)
 | |
|             response['content-length'] = str(len(content))
 | |
|             # Record the historical presence of the encoding in a way the won't interfere.
 | |
|             response['-content-encoding'] = response['content-encoding']
 | |
|             del response['content-encoding']
 | |
|     except IOError:
 | |
|         content = ""
 | |
|         raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content)
 | |
|     return content
 | |
| 
 | |
| def _updateCache(request_headers, response_headers, content, cache, cachekey):
 | |
|     if cachekey:
 | |
|         cc = _parse_cache_control(request_headers)
 | |
|         cc_response = _parse_cache_control(response_headers)
 | |
|         if cc.has_key('no-store') or cc_response.has_key('no-store'):
 | |
|             cache.delete(cachekey)
 | |
|         else:
 | |
|             info = email.Message.Message()
 | |
|             for key, value in response_headers.iteritems():
 | |
|                 if key not in ['status','content-encoding','transfer-encoding']:
 | |
|                     info[key] = value
 | |
| 
 | |
|             # Add annotations to the cache to indicate what headers
 | |
|             # are variant for this request.
 | |
|             vary = response_headers.get('vary', None)
 | |
|             if vary:
 | |
|                 vary_headers = vary.lower().replace(' ', '').split(',')
 | |
|                 for header in vary_headers:
 | |
|                     key = '-varied-%s' % header
 | |
|                     try:
 | |
|                         info[key] = request_headers[header]
 | |
|                     except KeyError:
 | |
|                         pass
 | |
| 
 | |
|             status = response_headers.status
 | |
|             if status == 304:
 | |
|                 status = 200
 | |
| 
 | |
|             status_header = 'status: %d\r\n' % response_headers.status
 | |
| 
 | |
|             header_str = info.as_string()
 | |
| 
 | |
|             header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
 | |
|             text = "".join([status_header, header_str, content])
 | |
| 
 | |
|             cache.set(cachekey, text)
 | |
| 
 | |
| def _cnonce():
 | |
|     dig = _md5("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest()
 | |
|     return dig[:16]
 | |
| 
 | |
| def _wsse_username_token(cnonce, iso_now, password):
 | |
|     return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip()
 | |
| 
 | |
| 
 | |
| # For credentials we need two things, first
 | |
| # a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
 | |
| # Then we also need a list of URIs that have already demanded authentication
 | |
| # That list is tricky since sub-URIs can take the same auth, or the
 | |
| # auth scheme may change as you descend the tree.
 | |
| # So we also need each Auth instance to be able to tell us
 | |
| # how close to the 'top' it is.
 | |
| 
 | |
| class Authentication(object):
 | |
|     def __init__(self, credentials, host, request_uri, headers, response, content, http):
 | |
|         (scheme, authority, path, query, fragment) = parse_uri(request_uri)
 | |
|         self.path = path
 | |
|         self.host = host
 | |
|         self.credentials = credentials
 | |
|         self.http = http
 | |
| 
 | |
|     def depth(self, request_uri):
 | |
|         (scheme, authority, path, query, fragment) = parse_uri(request_uri)
 | |
|         return request_uri[len(self.path):].count("/")
 | |
| 
 | |
|     def inscope(self, host, request_uri):
 | |
|         # XXX Should we normalize the request_uri?
 | |
|         (scheme, authority, path, query, fragment) = parse_uri(request_uri)
 | |
|         return (host == self.host) and path.startswith(self.path)
 | |
| 
 | |
|     def request(self, method, request_uri, headers, content):
 | |
|         """Modify the request headers to add the appropriate
 | |
|         Authorization header. Over-rise this in sub-classes."""
 | |
|         pass
 | |
| 
 | |
|     def response(self, response, content):
 | |
|         """Gives us a chance to update with new nonces
 | |
|         or such returned from the last authorized response.
 | |
|         Over-rise this in sub-classes if necessary.
 | |
| 
 | |
|         Return TRUE is the request is to be retried, for
 | |
|         example Digest may return stale=true.
 | |
|         """
 | |
|         return False
 | |
| 
 | |
| 
 | |
| 
 | |
| class BasicAuthentication(Authentication):
 | |
|     def __init__(self, credentials, host, request_uri, headers, response, content, http):
 | |
|         Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
 | |
| 
 | |
|     def request(self, method, request_uri, headers, content):
 | |
|         """Modify the request headers to add the appropriate
 | |
|         Authorization header."""
 | |
|         headers['authorization'] = 'Basic ' + base64.b64encode("%s:%s" % self.credentials).strip()
 | |
| 
 | |
| 
 | |
| class DigestAuthentication(Authentication):
 | |
|     """Only do qop='auth' and MD5, since that
 | |
|     is all Apache currently implements"""
 | |
|     def __init__(self, credentials, host, request_uri, headers, response, content, http):
 | |
|         Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
 | |
|         challenge = _parse_www_authenticate(response, 'www-authenticate')
 | |
|         self.challenge = challenge['digest']
 | |
|         qop = self.challenge.get('qop', 'auth')
 | |
|         self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None
 | |
|         if self.challenge['qop'] is None:
 | |
|             raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop))
 | |
|         self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper()
 | |
|         if self.challenge['algorithm'] != 'MD5':
 | |
|             raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
 | |
|         self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]])
 | |
|         self.challenge['nc'] = 1
 | |
| 
 | |
|     def request(self, method, request_uri, headers, content, cnonce = None):
 | |
|         """Modify the request headers"""
 | |
|         H = lambda x: _md5(x).hexdigest()
 | |
|         KD = lambda s, d: H("%s:%s" % (s, d))
 | |
|         A2 = "".join([method, ":", request_uri])
 | |
|         self.challenge['cnonce'] = cnonce or _cnonce()
 | |
|         request_digest  = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'],
 | |
|                     '%08x' % self.challenge['nc'],
 | |
|                     self.challenge['cnonce'],
 | |
|                     self.challenge['qop'], H(A2)
 | |
|                     ))
 | |
|         headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % (
 | |
|                 self.credentials[0],
 | |
|                 self.challenge['realm'],
 | |
|                 self.challenge['nonce'],
 | |
|                 request_uri,
 | |
|                 self.challenge['algorithm'],
 | |
|                 request_digest,
 | |
|                 self.challenge['qop'],
 | |
|                 self.challenge['nc'],
 | |
|                 self.challenge['cnonce'],
 | |
|                 )
 | |
|         self.challenge['nc'] += 1
 | |
| 
 | |
|     def response(self, response, content):
 | |
|         if not response.has_key('authentication-info'):
 | |
|             challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
 | |
|             if 'true' == challenge.get('stale'):
 | |
|                 self.challenge['nonce'] = challenge['nonce']
 | |
|                 self.challenge['nc'] = 1
 | |
|                 return True
 | |
|         else:
 | |
|             updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
 | |
| 
 | |
|             if updated_challenge.has_key('nextnonce'):
 | |
|                 self.challenge['nonce'] = updated_challenge['nextnonce']
 | |
|                 self.challenge['nc'] = 1
 | |
|         return False
 | |
| 
 | |
| 
 | |
| class HmacDigestAuthentication(Authentication):
 | |
|     """Adapted from Robert Sayre's code and DigestAuthentication above."""
 | |
|     __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
 | |
| 
 | |
|     def __init__(self, credentials, host, request_uri, headers, response, content, http):
 | |
|         Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
 | |
|         challenge = _parse_www_authenticate(response, 'www-authenticate')
 | |
|         self.challenge = challenge['hmacdigest']
 | |
|         # TODO: self.challenge['domain']
 | |
|         self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
 | |
|         if self.challenge['reason'] not in ['unauthorized', 'integrity']:
 | |
|             self.challenge['reason'] = 'unauthorized'
 | |
|         self.challenge['salt'] = self.challenge.get('salt', '')
 | |
|         if not self.challenge.get('snonce'):
 | |
|             raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty."))
 | |
|         self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1')
 | |
|         if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']:
 | |
|             raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm']))
 | |
|         self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1')
 | |
|         if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']:
 | |
|             raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm']))
 | |
|         if self.challenge['algorithm'] == 'HMAC-MD5':
 | |
|             self.hashmod = _md5
 | |
|         else:
 | |
|             self.hashmod = _sha
 | |
|         if self.challenge['pw-algorithm'] == 'MD5':
 | |
|             self.pwhashmod = _md5
 | |
|         else:
 | |
|             self.pwhashmod = _sha
 | |
|         self.key = "".join([self.credentials[0], ":",
 | |
|                     self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
 | |
|                     ":", self.challenge['realm']
 | |
|                     ])
 | |
|         self.key = self.pwhashmod.new(self.key).hexdigest().lower()
 | |
| 
 | |
|     def request(self, method, request_uri, headers, content):
 | |
|         """Modify the request headers"""
 | |
|         keys = _get_end2end_headers(headers)
 | |
|         keylist = "".join(["%s " % k for k in keys])
 | |
|         headers_val = "".join([headers[k] for k in keys])
 | |
|         created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
 | |
|         cnonce = _cnonce()
 | |
|         request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
 | |
|         request_digest  = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
 | |
|         headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
 | |
|                 self.credentials[0],
 | |
|                 self.challenge['realm'],
 | |
|                 self.challenge['snonce'],
 | |
|                 cnonce,
 | |
|                 request_uri,
 | |
|                 created,
 | |
|                 request_digest,
 | |
|                 keylist,
 | |
|                 )
 | |
| 
 | |
|     def response(self, response, content):
 | |
|         challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {})
 | |
|         if challenge.get('reason') in ['integrity', 'stale']:
 | |
|             return True
 | |
|         return False
 | |
| 
 | |
| 
 | |
| class WsseAuthentication(Authentication):
 | |
|     """This is thinly tested and should not be relied upon.
 | |
|     At this time there isn't any third party server to test against.
 | |
|     Blogger and TypePad implemented this algorithm at one point
 | |
|     but Blogger has since switched to Basic over HTTPS and
 | |
|     TypePad has implemented it wrong, by never issuing a 401
 | |
|     challenge but instead requiring your client to telepathically know that
 | |
|     their endpoint is expecting WSSE profile="UsernameToken"."""
 | |
|     def __init__(self, credentials, host, request_uri, headers, response, content, http):
 | |
|         Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
 | |
| 
 | |
|     def request(self, method, request_uri, headers, content):
 | |
|         """Modify the request headers to add the appropriate
 | |
|         Authorization header."""
 | |
|         headers['Authorization'] = 'WSSE profile="UsernameToken"'
 | |
|         iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
 | |
|         cnonce = _cnonce()
 | |
|         password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
 | |
|         headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % (
 | |
|                 self.credentials[0],
 | |
|                 password_digest,
 | |
|                 cnonce,
 | |
|                 iso_now)
 | |
| 
 | |
| class GoogleLoginAuthentication(Authentication):
 | |
|     def __init__(self, credentials, host, request_uri, headers, response, content, http):
 | |
|         from urllib import urlencode
 | |
|         Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
 | |
|         challenge = _parse_www_authenticate(response, 'www-authenticate')
 | |
|         service = challenge['googlelogin'].get('service', 'xapi')
 | |
|         # Bloggger actually returns the service in the challenge
 | |
|         # For the rest we guess based on the URI
 | |
|         if service == 'xapi' and  request_uri.find("calendar") > 0:
 | |
|             service = "cl"
 | |
|         # No point in guessing Base or Spreadsheet
 | |
|         #elif request_uri.find("spreadsheets") > 0:
 | |
|         #    service = "wise"
 | |
| 
 | |
|         auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent'])
 | |
|         resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'})
 | |
|         lines = content.split('\n')
 | |
|         d = dict([tuple(line.split("=", 1)) for line in lines if line])
 | |
|         if resp.status == 403:
 | |
|             self.Auth = ""
 | |
|         else:
 | |
|             self.Auth = d['Auth']
 | |
| 
 | |
|     def request(self, method, request_uri, headers, content):
 | |
|         """Modify the request headers to add the appropriate
 | |
|         Authorization header."""
 | |
|         headers['authorization'] = 'GoogleLogin Auth=' + self.Auth
 | |
| 
 | |
| 
 | |
| AUTH_SCHEME_CLASSES = {
 | |
|     "basic": BasicAuthentication,
 | |
|     "wsse": WsseAuthentication,
 | |
|     "digest": DigestAuthentication,
 | |
|     "hmacdigest": HmacDigestAuthentication,
 | |
|     "googlelogin": GoogleLoginAuthentication
 | |
| }
 | |
| 
 | |
| AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
 | |
| 
 | |
| class FileCache(object):
 | |
|     """Uses a local directory as a store for cached files.
 | |
|     Not really safe to use if multiple threads or processes are going to
 | |
|     be running on the same cache.
 | |
|     """
 | |
|     def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
 | |
|         self.cache = cache
 | |
|         self.safe = safe
 | |
|         if not os.path.exists(cache):
 | |
|             os.makedirs(self.cache)
 | |
| 
 | |
|     def get(self, key):
 | |
|         retval = None
 | |
|         cacheFullPath = os.path.join(self.cache, self.safe(key))
 | |
|         try:
 | |
|             f = file(cacheFullPath, "rb")
 | |
|             retval = f.read()
 | |
|             f.close()
 | |
|         except IOError:
 | |
|             pass
 | |
|         return retval
 | |
| 
 | |
|     def set(self, key, value):
 | |
|         cacheFullPath = os.path.join(self.cache, self.safe(key))
 | |
|         f = file(cacheFullPath, "wb")
 | |
|         f.write(value)
 | |
|         f.close()
 | |
| 
 | |
|     def delete(self, key):
 | |
|         cacheFullPath = os.path.join(self.cache, self.safe(key))
 | |
|         if os.path.exists(cacheFullPath):
 | |
|             os.remove(cacheFullPath)
 | |
| 
 | |
| class Credentials(object):
 | |
|     def __init__(self):
 | |
|         self.credentials = []
 | |
| 
 | |
|     def add(self, name, password, domain=""):
 | |
|         self.credentials.append((domain.lower(), name, password))
 | |
| 
 | |
|     def clear(self):
 | |
|         self.credentials = []
 | |
| 
 | |
|     def iter(self, domain):
 | |
|         for (cdomain, name, password) in self.credentials:
 | |
|             if cdomain == "" or domain == cdomain:
 | |
|                 yield (name, password)
 | |
| 
 | |
| class KeyCerts(Credentials):
 | |
|     """Identical to Credentials except that
 | |
|     name/password are mapped to key/cert."""
 | |
|     pass
 | |
| 
 | |
| 
 | |
| class ProxyInfo(object):
 | |
|   """Collect information required to use a proxy."""
 | |
|   def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None):
 | |
|       """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX
 | |
|       constants. For example:
 | |
| 
 | |
| p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000)
 | |
|       """
 | |
|       self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass
 | |
| 
 | |
|   def astuple(self):
 | |
|     return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns,
 | |
|         self.proxy_user, self.proxy_pass)
 | |
| 
 | |
|   def isgood(self):
 | |
|     return (self.proxy_host != None) and (self.proxy_port != None)
 | |
| 
 | |
| 
 | |
| class HTTPConnectionWithTimeout(httplib.HTTPConnection):
 | |
|     """HTTPConnection subclass that supports timeouts"""
 | |
| 
 | |
|     def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None):
 | |
|         httplib.HTTPConnection.__init__(self, host, port, strict)
 | |
|         self.timeout = timeout
 | |
|         self.proxy_info = proxy_info
 | |
| 
 | |
|     def connect(self):
 | |
|         """Connect to the host and port specified in __init__."""
 | |
|         # Mostly verbatim from httplib.py.
 | |
|         msg = "getaddrinfo returns an empty list"
 | |
|         for res in socket.getaddrinfo(self.host, self.port, 0,
 | |
|                 socket.SOCK_STREAM):
 | |
|             af, socktype, proto, canonname, sa = res
 | |
|             try:
 | |
|                 if self.proxy_info and self.proxy_info.isgood():
 | |
|                     self.sock = socks.socksocket(af, socktype, proto)
 | |
|                     self.sock.setproxy(*self.proxy_info.astuple())
 | |
|                 else:
 | |
|                     self.sock = socket.socket(af, socktype, proto)
 | |
|                     self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
 | |
|                 # Different from httplib: support timeouts.
 | |
|                 if has_timeout(self.timeout):
 | |
|                     self.sock.settimeout(self.timeout)
 | |
|                     # End of difference from httplib.
 | |
|                 if self.debuglevel > 0:
 | |
|                     print "connect: (%s, %s)" % (self.host, self.port)
 | |
| 
 | |
|                 self.sock.connect(sa)
 | |
|             except socket.error, msg:
 | |
|                 if self.debuglevel > 0:
 | |
|                     print 'connect fail:', (self.host, self.port)
 | |
|                 if self.sock:
 | |
|                     self.sock.close()
 | |
|                 self.sock = None
 | |
|                 continue
 | |
|             break
 | |
|         if not self.sock:
 | |
|             raise socket.error, msg
 | |
| 
 | |
| class HTTPSConnectionWithTimeout(httplib.HTTPSConnection):
 | |
|     "This class allows communication via SSL."
 | |
| 
 | |
|     def __init__(self, host, port=None, key_file=None, cert_file=None,
 | |
|                  strict=None, timeout=None, proxy_info=None):
 | |
|         httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file,
 | |
|                 cert_file=cert_file, strict=strict)
 | |
|         self.timeout = timeout
 | |
|         self.proxy_info = proxy_info
 | |
| 
 | |
|     def connect(self):
 | |
|         "Connect to a host on a given (SSL) port."
 | |
| 
 | |
|         if self.proxy_info and self.proxy_info.isgood():
 | |
|             sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM)
 | |
|             sock.setproxy(*self.proxy_info.astuple())
 | |
|         else:
 | |
|             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 | |
|             sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
 | |
| 
 | |
|         if has_timeout(self.timeout):
 | |
|             sock.settimeout(self.timeout)
 | |
|         sock.connect((self.host, self.port))
 | |
|         self.sock =_ssl_wrap_socket(sock, self.key_file, self.cert_file)
 | |
| 
 | |
| 
 | |
| 
 | |
| class Http(object):
 | |
|     """An HTTP client that handles:
 | |
| - all methods
 | |
| - caching
 | |
| - ETags
 | |
| - compression,
 | |
| - HTTPS
 | |
| - Basic
 | |
| - Digest
 | |
| - WSSE
 | |
| 
 | |
| and more.
 | |
|     """
 | |
|     def __init__(self, cache=None, timeout=None, proxy_info=None):
 | |
|         """The value of proxy_info is a ProxyInfo instance.
 | |
| 
 | |
| If 'cache' is a string then it is used as a directory name
 | |
| for a disk cache. Otherwise it must be an object that supports
 | |
| the same interface as FileCache."""
 | |
|         self.proxy_info = proxy_info
 | |
|         # Map domain name to an httplib connection
 | |
|         self.connections = {}
 | |
|         # The location of the cache, for now a directory
 | |
|         # where cached responses are held.
 | |
|         if cache and isinstance(cache, str):
 | |
|             self.cache = FileCache(cache)
 | |
|         else:
 | |
|             self.cache = cache
 | |
| 
 | |
|         # Name/password
 | |
|         self.credentials = Credentials()
 | |
| 
 | |
|         # Key/cert
 | |
|         self.certificates = KeyCerts()
 | |
| 
 | |
|         # authorization objects
 | |
|         self.authorizations = []
 | |
| 
 | |
|         # If set to False then no redirects are followed, even safe ones.
 | |
|         self.follow_redirects = True
 | |
| 
 | |
|         # Which HTTP methods do we apply optimistic concurrency to, i.e.
 | |
|         # which methods get an "if-match:" etag header added to them.
 | |
|         self.optimistic_concurrency_methods = ["PUT"]
 | |
| 
 | |
|         # If 'follow_redirects' is True, and this is set to True then
 | |
|         # all redirecs are followed, including unsafe ones.
 | |
|         self.follow_all_redirects = False
 | |
| 
 | |
|         self.ignore_etag = False
 | |
| 
 | |
|         self.force_exception_to_status_code = False
 | |
| 
 | |
|         self.timeout = timeout
 | |
| 
 | |
|     def _auth_from_challenge(self, host, request_uri, headers, response, content):
 | |
|         """A generator that creates Authorization objects
 | |
|            that can be applied to requests.
 | |
|         """
 | |
|         challenges = _parse_www_authenticate(response, 'www-authenticate')
 | |
|         for cred in self.credentials.iter(host):
 | |
|             for scheme in AUTH_SCHEME_ORDER:
 | |
|                 if challenges.has_key(scheme):
 | |
|                     yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
 | |
| 
 | |
|     def add_credentials(self, name, password, domain=""):
 | |
|         """Add a name and password that will be used
 | |
|         any time a request requires authentication."""
 | |
|         self.credentials.add(name, password, domain)
 | |
| 
 | |
|     def add_certificate(self, key, cert, domain):
 | |
|         """Add a key and cert that will be used
 | |
|         any time a request requires authentication."""
 | |
|         self.certificates.add(key, cert, domain)
 | |
| 
 | |
|     def clear_credentials(self):
 | |
|         """Remove all the names and passwords
 | |
|         that are used for authentication"""
 | |
|         self.credentials.clear()
 | |
|         self.authorizations = []
 | |
| 
 | |
|     def _conn_request(self, conn, request_uri, method, body, headers):
 | |
|         for i in range(2):
 | |
|             try:
 | |
|                 conn.request(method, request_uri, body, headers)
 | |
|             except socket.gaierror:
 | |
|                 conn.close()
 | |
|                 raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
 | |
|             except socket.error, e:
 | |
|                 if e.errno == errno.ECONNREFUSED: # Connection refused
 | |
|                     raise
 | |
|             except httplib.HTTPException:
 | |
|                 # Just because the server closed the connection doesn't apparently mean
 | |
|                 # that the server didn't send a response.
 | |
|                 pass
 | |
|             try:
 | |
|                 response = conn.getresponse()
 | |
|             except (socket.error, httplib.HTTPException):
 | |
|                 if i == 0:
 | |
|                     conn.close()
 | |
|                     conn.connect()
 | |
|                     continue
 | |
|                 else:
 | |
|                     raise
 | |
|             else:
 | |
|                 content = ""
 | |
|                 if method == "HEAD":
 | |
|                     response.close()
 | |
|                 else:
 | |
|                     content = response.read()
 | |
|                 response = Response(response)
 | |
|                 if method != "HEAD":
 | |
|                     content = _decompressContent(response, content)
 | |
|             break
 | |
|         return (response, content)
 | |
| 
 | |
| 
 | |
|     def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey):
 | |
|         """Do the actual request using the connection object
 | |
|         and also follow one level of redirects if necessary"""
 | |
| 
 | |
|         auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
 | |
|         auth = auths and sorted(auths)[0][1] or None
 | |
|         if auth:
 | |
|             auth.request(method, request_uri, headers, body)
 | |
| 
 | |
|         (response, content) = self._conn_request(conn, request_uri, method, body, headers)
 | |
| 
 | |
|         if auth:
 | |
|             if auth.response(response, body):
 | |
|                 auth.request(method, request_uri, headers, body)
 | |
|                 (response, content) = self._conn_request(conn, request_uri, method, body, headers )
 | |
|                 response._stale_digest = 1
 | |
| 
 | |
|         if response.status == 401:
 | |
|             for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
 | |
|                 authorization.request(method, request_uri, headers, body)
 | |
|                 (response, content) = self._conn_request(conn, request_uri, method, body, headers, )
 | |
|                 if response.status != 401:
 | |
|                     self.authorizations.append(authorization)
 | |
|                     authorization.response(response, body)
 | |
|                     break
 | |
| 
 | |
|         if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303):
 | |
|             if self.follow_redirects and response.status in [300, 301, 302, 303, 307]:
 | |
|                 # Pick out the location header and basically start from the beginning
 | |
|                 # remembering first to strip the ETag header and decrement our 'depth'
 | |
|                 if redirections:
 | |
|                     if not response.has_key('location') and response.status != 300:
 | |
|                         raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content)
 | |
|                     # Fix-up relative redirects (which violate an RFC 2616 MUST)
 | |
|                     if response.has_key('location'):
 | |
|                         location = response['location']
 | |
|                         (scheme, authority, path, query, fragment) = parse_uri(location)
 | |
|                         if authority == None:
 | |
|                             response['location'] = urlparse.urljoin(absolute_uri, location)
 | |
|                     if response.status == 301 and method in ["GET", "HEAD"]:
 | |
|                         response['-x-permanent-redirect-url'] = response['location']
 | |
|                         if not response.has_key('content-location'):
 | |
|                             response['content-location'] = absolute_uri
 | |
|                         _updateCache(headers, response, content, self.cache, cachekey)
 | |
|                     if headers.has_key('if-none-match'):
 | |
|                         del headers['if-none-match']
 | |
|                     if headers.has_key('if-modified-since'):
 | |
|                         del headers['if-modified-since']
 | |
|                     if response.has_key('location'):
 | |
|                         location = response['location']
 | |
|                         old_response = copy.deepcopy(response)
 | |
|                         if not old_response.has_key('content-location'):
 | |
|                             old_response['content-location'] = absolute_uri
 | |
|                         redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method
 | |
|                         (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1)
 | |
|                         response.previous = old_response
 | |
|                 else:
 | |
|                     raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content)
 | |
|             elif response.status in [200, 203] and method == "GET":
 | |
|                 # Don't cache 206's since we aren't going to handle byte range requests
 | |
|                 if not response.has_key('content-location'):
 | |
|                     response['content-location'] = absolute_uri
 | |
|                 _updateCache(headers, response, content, self.cache, cachekey)
 | |
| 
 | |
|         return (response, content)
 | |
| 
 | |
|     def _normalize_headers(self, headers):
 | |
|         return _normalize_headers(headers)
 | |
| 
 | |
| # Need to catch and rebrand some exceptions
 | |
| # Then need to optionally turn all exceptions into status codes
 | |
| # including all socket.* and httplib.* exceptions.
 | |
| 
 | |
| 
 | |
|     def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None):
 | |
|         """ Performs a single HTTP request.
 | |
| The 'uri' is the URI of the HTTP resource and can begin
 | |
| with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
 | |
| 
 | |
| The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
 | |
| There is no restriction on the methods allowed.
 | |
| 
 | |
| The 'body' is the entity body to be sent with the request. It is a string
 | |
| object.
 | |
| 
 | |
| Any extra headers that are to be sent with the request should be provided in the
 | |
| 'headers' dictionary.
 | |
| 
 | |
| The maximum number of redirect to follow before raising an
 | |
| exception is 'redirections. The default is 5.
 | |
| 
 | |
| The return value is a tuple of (response, content), the first
 | |
| being and instance of the 'Response' class, the second being
 | |
| a string that contains the response entity body.
 | |
|         """
 | |
|         try:
 | |
|             if headers is None:
 | |
|                 headers = {}
 | |
|             else:
 | |
|                 headers = self._normalize_headers(headers)
 | |
| 
 | |
|             if not headers.has_key('user-agent'):
 | |
|                 headers['user-agent'] = "Python-httplib2/%s" % __version__
 | |
| 
 | |
|             uri = iri2uri(uri)
 | |
| 
 | |
|             (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
 | |
|             domain_port = authority.split(":")[0:2]
 | |
|             if len(domain_port) == 2 and domain_port[1] == '443' and scheme == 'http':
 | |
|                 scheme = 'https'
 | |
|                 authority = domain_port[0]
 | |
| 
 | |
|             conn_key = scheme+":"+authority
 | |
|             if conn_key in self.connections:
 | |
|                 conn = self.connections[conn_key]
 | |
|             else:
 | |
|                 if not connection_type:
 | |
|                     connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout
 | |
|                 certs = list(self.certificates.iter(authority))
 | |
|                 if scheme == 'https' and certs:
 | |
|                     conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0],
 | |
|                         cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info)
 | |
|                 else:
 | |
|                     conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info)
 | |
|                 conn.set_debuglevel(debuglevel)
 | |
| 
 | |
|             if method in ["GET", "HEAD"] and 'range' not in headers and 'accept-encoding' not in headers:
 | |
|                 headers['accept-encoding'] = 'gzip, deflate'
 | |
| 
 | |
|             info = email.Message.Message()
 | |
|             cached_value = None
 | |
|             if self.cache:
 | |
|                 cachekey = defrag_uri
 | |
|                 cached_value = self.cache.get(cachekey)
 | |
|                 if cached_value:
 | |
|                     # info = email.message_from_string(cached_value)
 | |
|                     #
 | |
|                     # Need to replace the line above with the kludge below
 | |
|                     # to fix the non-existent bug not fixed in this
 | |
|                     # bug report: http://mail.python.org/pipermail/python-bugs-list/2005-September/030289.html
 | |
|                     try:
 | |
|                         info, content = cached_value.split('\r\n\r\n', 1)
 | |
|                         feedparser = email.FeedParser.FeedParser()
 | |
|                         feedparser.feed(info)
 | |
|                         info = feedparser.close()
 | |
|                         feedparser._parse = None
 | |
|                     except IndexError:
 | |
|                         self.cache.delete(cachekey)
 | |
|                         cachekey = None
 | |
|                         cached_value = None
 | |
|             else:
 | |
|                 cachekey = None
 | |
| 
 | |
|             if method in self.optimistic_concurrency_methods and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
 | |
|                 # http://www.w3.org/1999/04/Editing/
 | |
|                 headers['if-match'] = info['etag']
 | |
| 
 | |
|             if method not in ["GET", "HEAD"] and self.cache and cachekey:
 | |
|                 # RFC 2616 Section 13.10
 | |
|                 self.cache.delete(cachekey)
 | |
| 
 | |
|             # Check the vary header in the cache to see if this request
 | |
|             # matches what varies in the cache.
 | |
|             if method in ['GET', 'HEAD'] and 'vary' in info:
 | |
|                 vary = info['vary']
 | |
|                 vary_headers = vary.lower().replace(' ', '').split(',')
 | |
|                 for header in vary_headers:
 | |
|                     key = '-varied-%s' % header
 | |
|                     value = info[key]
 | |
|                     if headers.get(header, None) != value:
 | |
|                             cached_value = None
 | |
|                             break
 | |
| 
 | |
|             if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers:
 | |
|                 if info.has_key('-x-permanent-redirect-url'):
 | |
|                     # Should cached permanent redirects be counted in our redirection count? For now, yes.
 | |
|                     (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1)
 | |
|                     response.previous = Response(info)
 | |
|                     response.previous.fromcache = True
 | |
|                 else:
 | |
|                     # Determine our course of action:
 | |
|                     #   Is the cached entry fresh or stale?
 | |
|                     #   Has the client requested a non-cached response?
 | |
|                     #
 | |
|                     # There seems to be three possible answers:
 | |
|                     # 1. [FRESH] Return the cache entry w/o doing a GET
 | |
|                     # 2. [STALE] Do the GET (but add in cache validators if available)
 | |
|                     # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
 | |
|                     entry_disposition = _entry_disposition(info, headers)
 | |
| 
 | |
|                     if entry_disposition == "FRESH":
 | |
|                         if not cached_value:
 | |
|                             info['status'] = '504'
 | |
|                             content = ""
 | |
|                         response = Response(info)
 | |
|                         if cached_value:
 | |
|                             response.fromcache = True
 | |
|                         return (response, content)
 | |
| 
 | |
|                     if entry_disposition == "STALE":
 | |
|                         if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
 | |
|                             headers['if-none-match'] = info['etag']
 | |
|                         if info.has_key('last-modified') and not 'last-modified' in headers:
 | |
|                             headers['if-modified-since'] = info['last-modified']
 | |
|                     elif entry_disposition == "TRANSPARENT":
 | |
|                         pass
 | |
| 
 | |
|                     (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
 | |
| 
 | |
|                 if response.status == 304 and method == "GET":
 | |
|                     # Rewrite the cache entry with the new end-to-end headers
 | |
|                     # Take all headers that are in response
 | |
|                     # and overwrite their values in info.
 | |
|                     # unless they are hop-by-hop, or are listed in the connection header.
 | |
| 
 | |
|                     for key in _get_end2end_headers(response):
 | |
|                         info[key] = response[key]
 | |
|                     merged_response = Response(info)
 | |
|                     if hasattr(response, "_stale_digest"):
 | |
|                         merged_response._stale_digest = response._stale_digest
 | |
|                     _updateCache(headers, merged_response, content, self.cache, cachekey)
 | |
|                     response = merged_response
 | |
|                     response.status = 200
 | |
|                     response.fromcache = True
 | |
| 
 | |
|                 elif response.status == 200:
 | |
|                     content = new_content
 | |
|                 else:
 | |
|                     self.cache.delete(cachekey)
 | |
|                     content = new_content
 | |
|             else:
 | |
|                 cc = _parse_cache_control(headers)
 | |
|                 if cc.has_key('only-if-cached'):
 | |
|                     info['status'] = '504'
 | |
|                     response = Response(info)
 | |
|                     content = ""
 | |
|                 else:
 | |
|                     (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
 | |
|         except Exception, e:
 | |
|             if self.force_exception_to_status_code:
 | |
|                 if isinstance(e, HttpLib2ErrorWithResponse):
 | |
|                     response = e.response
 | |
|                     content = e.content
 | |
|                     response.status = 500
 | |
|                     response.reason = str(e)
 | |
|                 elif isinstance(e, socket.timeout):
 | |
|                     content = "Request Timeout"
 | |
|                     response = Response( {
 | |
|                             "content-type": "text/plain",
 | |
|                             "status": "408",
 | |
|                             "content-length": len(content)
 | |
|                             })
 | |
|                     response.reason = "Request Timeout"
 | |
|                 else:
 | |
|                     content = str(e)
 | |
|                     response = Response( {
 | |
|                             "content-type": "text/plain",
 | |
|                             "status": "400",
 | |
|                             "content-length": len(content)
 | |
|                             })
 | |
|                     response.reason = "Bad Request"
 | |
|             else:
 | |
|                 raise
 | |
| 
 | |
| 
 | |
|         return (response, content)
 | |
| 
 | |
| 
 | |
| 
 | |
| class Response(dict):
 | |
|     """An object more like email.Message than httplib.HTTPResponse."""
 | |
| 
 | |
|     """Is this response from our local cache"""
 | |
|     fromcache = False
 | |
| 
 | |
|     """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """
 | |
|     version = 11
 | |
| 
 | |
|     "Status code returned by server. "
 | |
|     status = 200
 | |
| 
 | |
|     """Reason phrase returned by server."""
 | |
|     reason = "Ok"
 | |
| 
 | |
|     previous = None
 | |
| 
 | |
|     def __init__(self, info):
 | |
|         # info is either an email.Message or
 | |
|         # an httplib.HTTPResponse object.
 | |
|         if isinstance(info, httplib.HTTPResponse):
 | |
|             for key, value in info.getheaders():
 | |
|                 self[key.lower()] = value
 | |
|             self.status = info.status
 | |
|             self['status'] = str(self.status)
 | |
|             self.reason = info.reason
 | |
|             self.version = info.version
 | |
|         elif isinstance(info, email.Message.Message):
 | |
|             for key, value in info.items():
 | |
|                 self[key] = value
 | |
|             self.status = int(self['status'])
 | |
|         else:
 | |
|             for key, value in info.iteritems():
 | |
|                 self[key] = value
 | |
|             self.status = int(self.get('status', self.status))
 | |
| 
 | |
| 
 | |
|     def __getattr__(self, name):
 | |
|         if name == 'dict':
 | |
|             return self
 | |
|         else:
 | |
|             raise AttributeError, name
 | 
