Files
deb-python-eventlet/eventlet/green/urllib.py

651 lines
24 KiB
Python

urllib = __import__('urllib')
for var in dir(urllib):
exec "%s = urllib.%s" % (var, var)
# import the following to be a better drop-in replacement
__import_lst = ['__all__', '__version__', 'MAXFTPCACHE', 'ContentTooShortError',
'ftpcache', '_noheaders', 'noheaders', 'addbase', 'addclosehook',
'addinfo', 'addinfourl', '_is_unicode', 'toBytes', '_hextochr',
'always_safe', 'getproxies_environment', 'proxy_bypass']
for var in __import_lst:
exec "%s = urllib.%s" % (var, var)
from eventlet.green import socket
import os
from eventlet.green import time
import sys
from urlparse import urljoin as basejoin
# Shortcut for basic usage
_urlopener = None
def urlopen(url, data=None, proxies=None):
"""urlopen(url [, data]) -> open file-like object"""
global _urlopener
if proxies is not None:
opener = FancyURLopener(proxies=proxies)
elif not _urlopener:
opener = FancyURLopener()
_urlopener = opener
else:
opener = _urlopener
if data is None:
return opener.open(url)
else:
return opener.open(url, data)
def urlretrieve(url, filename=None, reporthook=None, data=None):
global _urlopener
if not _urlopener:
_urlopener = FancyURLopener()
return _urlopener.retrieve(url, filename, reporthook, data)
def urlcleanup():
if _urlopener:
_urlopener.cleanup()
class URLopener(urllib.URLopener):
def open_http(self, url, data=None):
"""Use HTTP protocol."""
from eventlet.green import httplib
user_passwd = None
proxy_passwd= None
if isinstance(url, str):
host, selector = splithost(url)
if host:
user_passwd, host = splituser(host)
host = unquote(host)
realhost = host
else:
host, selector = url
# check whether the proxy contains authorization information
proxy_passwd, host = splituser(host)
# now we proceed with the url we want to obtain
urltype, rest = splittype(selector)
url = rest
user_passwd = None
if urltype.lower() != 'http':
realhost = None
else:
realhost, rest = splithost(rest)
if realhost:
user_passwd, realhost = splituser(realhost)
if user_passwd:
selector = "%s://%s%s" % (urltype, realhost, rest)
if proxy_bypass(realhost):
host = realhost
#print "proxy via http:", host, selector
if not host: raise IOError, ('http error', 'no host given')
if proxy_passwd:
import base64
proxy_auth = base64.b64encode(proxy_passwd).strip()
else:
proxy_auth = None
if user_passwd:
import base64
auth = base64.b64encode(user_passwd).strip()
else:
auth = None
h = httplib.HTTP(host)
if data is not None:
h.putrequest('POST', selector)
h.putheader('Content-Type', 'application/x-www-form-urlencoded')
h.putheader('Content-Length', '%d' % len(data))
else:
h.putrequest('GET', selector)
if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
if auth: h.putheader('Authorization', 'Basic %s' % auth)
if realhost: h.putheader('Host', realhost)
for args in self.addheaders: h.putheader(*args)
h.endheaders()
if data is not None:
h.send(data)
errcode, errmsg, headers = h.getreply()
if errcode == -1:
# something went wrong with the HTTP status line
raise IOError, ('http protocol error', 0,
'got a bad status line', None)
fp = h.getfile()
if errcode == 200:
return addinfourl(fp, headers, "http:" + url)
else:
if data is None:
return self.http_error(url, fp, errcode, errmsg, headers)
else:
return self.http_error(url, fp, errcode, errmsg, headers, data)
if hasattr(socket, "ssl"):
def open_https(self, url, data=None):
"""Use HTTPS protocol."""
from eventlet.green import httplib
user_passwd = None
proxy_passwd = None
if isinstance(url, str):
host, selector = splithost(url)
if host:
user_passwd, host = splituser(host)
host = unquote(host)
realhost = host
else:
host, selector = url
# here, we determine, whether the proxy contains authorization information
proxy_passwd, host = splituser(host)
urltype, rest = splittype(selector)
url = rest
user_passwd = None
if urltype.lower() != 'https':
realhost = None
else:
realhost, rest = splithost(rest)
if realhost:
user_passwd, realhost = splituser(realhost)
if user_passwd:
selector = "%s://%s%s" % (urltype, realhost, rest)
#print "proxy via https:", host, selector
if not host: raise IOError, ('https error', 'no host given')
if proxy_passwd:
import base64
proxy_auth = base64.b64encode(proxy_passwd).strip()
else:
proxy_auth = None
if user_passwd:
import base64
auth = base64.b64encode(user_passwd).strip()
else:
auth = None
h = httplib.HTTPS(host, 0,
key_file=self.key_file,
cert_file=self.cert_file)
if data is not None:
h.putrequest('POST', selector)
h.putheader('Content-Type',
'application/x-www-form-urlencoded')
h.putheader('Content-Length', '%d' % len(data))
else:
h.putrequest('GET', selector)
if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
if auth: h.putheader('Authorization', 'Basic %s' % auth)
if realhost: h.putheader('Host', realhost)
for args in self.addheaders: h.putheader(*args)
h.endheaders()
if data is not None:
h.send(data)
errcode, errmsg, headers = h.getreply()
if errcode == -1:
# something went wrong with the HTTP status line
raise IOError, ('http protocol error', 0,
'got a bad status line', None)
fp = h.getfile()
if errcode == 200:
return addinfourl(fp, headers, "https:" + url)
else:
if data is None:
return self.http_error(url, fp, errcode, errmsg, headers)
else:
return self.http_error(url, fp, errcode, errmsg, headers,
data)
def open_gopher(self, url):
"""Use Gopher protocol."""
if not isinstance(url, str):
raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented')
from eventlet.green import gopherlib
host, selector = splithost(url)
if not host: raise IOError, ('gopher error', 'no host given')
host = unquote(host)
type, selector = splitgophertype(selector)
selector, query = splitquery(selector)
selector = unquote(selector)
if query:
query = unquote(query)
fp = gopherlib.send_query(selector, query, host)
else:
fp = gopherlib.send_selector(selector, host)
return addinfourl(fp, noheaders(), "gopher:" + url)
def open_local_file(self, url):
"""Use local file."""
import mimetypes, mimetools, email.Utils
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
host, file = splithost(url)
localname = url2pathname(file)
try:
stats = os.stat(localname)
except OSError, e:
raise IOError(e.errno, e.strerror, e.filename)
size = stats.st_size
modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
mtype = mimetypes.guess_type(url)[0]
headers = mimetools.Message(StringIO(
'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
(mtype or 'text/plain', size, modified)))
if not host:
urlfile = file
if file[:1] == '/':
urlfile = 'file://' + file
return addinfourl(open(localname, 'rb'),
headers, urlfile)
host, port = splitport(host)
if not port \
and socket.gethostbyname(host) in (localhost(), thishost()):
urlfile = file
if file[:1] == '/':
urlfile = 'file://' + file
return addinfourl(open(localname, 'rb'),
headers, urlfile)
raise IOError, ('local file error', 'not on local host')
def open_ftp(self, url):
"""Use FTP protocol."""
if not isinstance(url, str):
raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
import mimetypes, mimetools
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
host, path = splithost(url)
if not host: raise IOError, ('ftp error', 'no host given')
host, port = splitport(host)
user, host = splituser(host)
if user: user, passwd = splitpasswd(user)
else: passwd = None
host = unquote(host)
user = unquote(user or '')
passwd = unquote(passwd or '')
host = socket.gethostbyname(host)
if not port:
from eventlet.green import ftplib
port = ftplib.FTP_PORT
else:
port = int(port)
path, attrs = splitattr(path)
path = unquote(path)
dirs = path.split('/')
dirs, file = dirs[:-1], dirs[-1]
if dirs and not dirs[0]: dirs = dirs[1:]
if dirs and not dirs[0]: dirs[0] = '/'
key = user, host, port, '/'.join(dirs)
# XXX thread unsafe!
if len(self.ftpcache) > MAXFTPCACHE:
# Prune the cache, rather arbitrarily
for k in self.ftpcache.keys():
if k != key:
v = self.ftpcache[k]
del self.ftpcache[k]
v.close()
try:
if not key in self.ftpcache:
self.ftpcache[key] = \
ftpwrapper(user, passwd, host, port, dirs)
if not file: type = 'D'
else: type = 'I'
for attr in attrs:
attr, value = splitvalue(attr)
if attr.lower() == 'type' and \
value in ('a', 'A', 'i', 'I', 'd', 'D'):
type = value.upper()
(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
mtype = mimetypes.guess_type("ftp:" + url)[0]
headers = ""
if mtype:
headers += "Content-Type: %s\n" % mtype
if retrlen is not None and retrlen >= 0:
headers += "Content-Length: %d\n" % retrlen
headers = mimetools.Message(StringIO(headers))
return addinfourl(fp, headers, "ftp:" + url)
except ftperrors(), msg:
raise IOError, ('ftp error', msg), sys.exc_info()[2]
# this one is copied verbatim
class FancyURLopener(URLopener):
"""Derived class with handlers for errors we can handle (perhaps)."""
def __init__(self, *args, **kwargs):
URLopener.__init__(self, *args, **kwargs)
self.auth_cache = {}
self.tries = 0
self.maxtries = 10
def http_error_default(self, url, fp, errcode, errmsg, headers):
"""Default error handling -- don't raise an exception."""
return addinfourl(fp, headers, "http:" + url)
def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 302 -- relocated (temporarily)."""
self.tries += 1
if self.maxtries and self.tries >= self.maxtries:
if hasattr(self, "http_error_500"):
meth = self.http_error_500
else:
meth = self.http_error_default
self.tries = 0
return meth(url, fp, 500,
"Internal Server Error: Redirect Recursion", headers)
result = self.redirect_internal(url, fp, errcode, errmsg, headers,
data)
self.tries = 0
return result
def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
if 'location' in headers:
newurl = headers['location']
elif 'uri' in headers:
newurl = headers['uri']
else:
return
void = fp.read()
fp.close()
# In case the server sent a relative URL, join with original:
newurl = basejoin(self.type + ":" + url, newurl)
return self.open(newurl)
def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 301 -- also relocated (permanently)."""
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 303 -- also relocated (essentially identical to 302)."""
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 307 -- relocated, but turn POST into error."""
if data is None:
return self.http_error_302(url, fp, errcode, errmsg, headers, data)
else:
return self.http_error_default(url, fp, errcode, errmsg, headers)
def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 401 -- authentication required.
This function supports Basic authentication only."""
if not 'www-authenticate' in headers:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['www-authenticate']
import re
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
if not match:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
scheme, realm = match.groups()
if scheme.lower() != 'basic':
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
name = 'retry_' + self.type + '_basic_auth'
if data is None:
return getattr(self,name)(url, realm)
else:
return getattr(self,name)(url, realm, data)
def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
"""Error 407 -- proxy authentication required.
This function supports Basic authentication only."""
if not 'proxy-authenticate' in headers:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['proxy-authenticate']
import re
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
if not match:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
scheme, realm = match.groups()
if scheme.lower() != 'basic':
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
name = 'retry_proxy_' + self.type + '_basic_auth'
if data is None:
return getattr(self,name)(url, realm)
else:
return getattr(self,name)(url, realm, data)
def retry_proxy_http_basic_auth(self, url, realm, data=None):
host, selector = splithost(url)
newurl = 'http://' + host + selector
proxy = self.proxies['http']
urltype, proxyhost = splittype(proxy)
proxyhost, proxyselector = splithost(proxyhost)
i = proxyhost.find('@') + 1
proxyhost = proxyhost[i:]
user, passwd = self.get_user_passwd(proxyhost, realm, i)
if not (user or passwd): return None
proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
self.proxies['http'] = 'http://' + proxyhost + proxyselector
if data is None:
return self.open(newurl)
else:
return self.open(newurl, data)
def retry_proxy_https_basic_auth(self, url, realm, data=None):
host, selector = splithost(url)
newurl = 'https://' + host + selector
proxy = self.proxies['https']
urltype, proxyhost = splittype(proxy)
proxyhost, proxyselector = splithost(proxyhost)
i = proxyhost.find('@') + 1
proxyhost = proxyhost[i:]
user, passwd = self.get_user_passwd(proxyhost, realm, i)
if not (user or passwd): return None
proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
self.proxies['https'] = 'https://' + proxyhost + proxyselector
if data is None:
return self.open(newurl)
else:
return self.open(newurl, data)
def retry_http_basic_auth(self, url, realm, data=None):
host, selector = splithost(url)
i = host.find('@') + 1
host = host[i:]
user, passwd = self.get_user_passwd(host, realm, i)
if not (user or passwd): return None
host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
newurl = 'http://' + host + selector
if data is None:
return self.open(newurl)
else:
return self.open(newurl, data)
def retry_https_basic_auth(self, url, realm, data=None):
host, selector = splithost(url)
i = host.find('@') + 1
host = host[i:]
user, passwd = self.get_user_passwd(host, realm, i)
if not (user or passwd): return None
host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
newurl = 'https://' + host + selector
if data is None:
return self.open(newurl)
else:
return self.open(newurl, data)
def get_user_passwd(self, host, realm, clear_cache = 0):
key = realm + '@' + host.lower()
if key in self.auth_cache:
if clear_cache:
del self.auth_cache[key]
else:
return self.auth_cache[key]
user, passwd = self.prompt_user_passwd(host, realm)
if user or passwd: self.auth_cache[key] = (user, passwd)
return user, passwd
def prompt_user_passwd(self, host, realm):
"""Override this in a GUI environment!"""
import getpass
try:
user = raw_input("Enter username for %s at %s: " % (realm,
host))
passwd = getpass.getpass("Enter password for %s in %s at %s: " %
(user, realm, host))
return user, passwd
except KeyboardInterrupt:
print
return None, None
# Utility functions
_localhost = None
def localhost():
"""Return the IP address of the magic hostname 'localhost'."""
global _localhost
if _localhost is None:
_localhost = socket.gethostbyname('localhost')
return _localhost
_thishost = None
def thishost():
"""Return the IP address of the current host."""
global _thishost
if _thishost is None:
_thishost = socket.gethostbyname(socket.gethostname())
return _thishost
_ftperrors = None
def ftperrors():
"""Return the set of errors raised by the FTP class."""
global _ftperrors
if _ftperrors is None:
from eventlet.green import ftplib
_ftperrors = ftplib.all_errors
return _ftperrors
# Utility classes
class ftpwrapper(urllib.ftpwrapper):
"""Class used by open_ftp() for cache of open FTP connections."""
def init(self):
from eventlet.green import ftplib
self.busy = 0
self.ftp = ftplib.FTP()
self.ftp.connect(self.host, self.port)
self.ftp.login(self.user, self.passwd)
for dir in self.dirs:
self.ftp.cwd(dir)
def retrfile(self, file, type):
from eventlet.green import ftplib
self.endtransfer()
if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
else: cmd = 'TYPE ' + type; isdir = 0
try:
self.ftp.voidcmd(cmd)
except ftplib.all_errors:
self.init()
self.ftp.voidcmd(cmd)
conn = None
if file and not isdir:
# Try to retrieve as a file
try:
cmd = 'RETR ' + file
conn = self.ftp.ntransfercmd(cmd)
except ftplib.error_perm, reason:
if str(reason)[:3] != '550':
raise IOError, ('ftp error', reason), sys.exc_info()[2]
if not conn:
# Set transfer mode to ASCII!
self.ftp.voidcmd('TYPE A')
# Try a directory listing
if file: cmd = 'LIST ' + file
else: cmd = 'LIST'
conn = self.ftp.ntransfercmd(cmd)
self.busy = 1
# Pass back both a suitably decorated object and a retrieval length
return (addclosehook(conn[0].makefile('rb'),
self.endtransfer), conn[1])
# Test and time quote() and unquote()
def test1():
s = ''
for i in range(256): s = s + chr(i)
s = s*4
t0 = time.time()
qs = quote(s)
uqs = unquote(qs)
t1 = time.time()
if uqs != s:
print 'Wrong!'
print repr(s)
print repr(qs)
print repr(uqs)
print round(t1 - t0, 3), 'sec'
def reporthook(blocknum, blocksize, totalsize):
# Report during remote transfers
print "Block number: %d, Block size: %d, Total size: %d" % (
blocknum, blocksize, totalsize)
# Test program
def test(args=[]):
if not args:
args = [
'/etc/passwd',
'file:/etc/passwd',
'file://localhost/etc/passwd',
'ftp://ftp.gnu.org/pub/README',
## 'gopher://gopher.micro.umn.edu/1/',
'http://www.python.org/index.html',
]
if hasattr(URLopener, "open_https"):
args.append('https://synergy.as.cmu.edu/~geek/')
try:
for url in args:
print '-'*10, url, '-'*10
fn, h = urlretrieve(url, None, reporthook)
print fn
if h:
print '======'
for k in h.keys(): print k + ':', h[k]
print '======'
fp = open(fn, 'rb')
data = fp.read()
del fp
if '\r' in data:
table = string.maketrans("", "")
data = data.translate(table, "\r")
print data
fn, h = None, None
print '-'*40
finally:
urlcleanup()
def main():
import getopt, sys
try:
opts, args = getopt.getopt(sys.argv[1:], "th")
except getopt.error, msg:
print msg
print "Use -h for help"
return
t = 0
for o, a in opts:
if o == '-t':
t = t + 1
if o == '-h':
print "Usage: python urllib.py [-t] [url ...]"
print "-t runs self-test;",
print "otherwise, contents of urls are printed"
return
if t:
if t > 1:
test1()
test(args)
else:
if not args:
print "Use -h for help"
for url in args:
print urlopen(url).read(),
# Run test program when run as a script
if __name__ == '__main__':
main()