From 1b6fe06cb95b2f6f7d97f317769d8a0c171478f8 Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Sun, 29 Nov 2009 01:44:36 -0500 Subject: [PATCH] Replaced copy-and-paste code of urllib with short patcher snippet, adding a hacky hack in the process, but one I don't feel too bad about because it's freakin urllib which no one actually uses (or admits to using). --- eventlet/green/ftplib.py | 13 + eventlet/green/urllib.py | 659 ++---------------------------------- eventlet/patcher.py | 19 +- tests/stdlib/test_ftplib.py | 13 + tests/stdlib/test_urllib.py | 11 + 5 files changed, 74 insertions(+), 641 deletions(-) create mode 100644 eventlet/green/ftplib.py create mode 100644 tests/stdlib/test_ftplib.py create mode 100644 tests/stdlib/test_urllib.py diff --git a/eventlet/green/ftplib.py b/eventlet/green/ftplib.py new file mode 100644 index 0000000..b452e1d --- /dev/null +++ b/eventlet/green/ftplib.py @@ -0,0 +1,13 @@ +from eventlet import patcher + +# *NOTE: there might be some funny business with the "SOCKS" module +# if it even still exists +from eventlet.green import socket + +patcher.inject('ftplib', globals(), ('socket', socket)) + +del patcher + +# Run test program when run as a script +if __name__ == '__main__': + test() diff --git a/eventlet/green/urllib.py b/eventlet/green/urllib.py index 207c57c..fe5c02d 100644 --- a/eventlet/green/urllib.py +++ b/eventlet/green/urllib.py @@ -1,649 +1,28 @@ -urllib = __import__('urllib') -for var in dir(urllib): - exec "%s = urllib.%s" % (var, var) - -# import the following to be a better drop-in replacement -__import_lst = ['__all__', '__version__', 'MAXFTPCACHE', 'ContentTooShortError', - 'ftpcache', '_noheaders', 'noheaders', 'addbase', 'addclosehook', - 'addinfo', 'addinfourl', '_is_unicode', 'toBytes', '_hextochr', - 'always_safe', 'getproxies_environment', 'proxy_bypass'] - -for var in __import_lst: - exec "%s = urllib.%s" % (var, var) - +from eventlet import patcher from eventlet.green import socket -import os from eventlet.green import time -import sys -from urlparse import urljoin as basejoin +from eventlet.green import httplib +from eventlet.green import ftplib -# Shortcut for basic usage -_urlopener = None -def urlopen(url, data=None, proxies=None): - """urlopen(url [, data]) -> open file-like object""" - global _urlopener - if proxies is not None: - opener = FancyURLopener(proxies=proxies) - elif not _urlopener: - opener = FancyURLopener() - _urlopener = opener - else: - opener = _urlopener - if data is None: - return opener.open(url) - else: - return opener.open(url, data) -def urlretrieve(url, filename=None, reporthook=None, data=None): - global _urlopener - if not _urlopener: - _urlopener = FancyURLopener() - return _urlopener.retrieve(url, filename, reporthook, data) -def urlcleanup(): - if _urlopener: - _urlopener.cleanup() +to_patch = [('socket', socket), ('httplib', httplib), + ('time', time), ('ftplib', ftplib)] +try: + from eventlet.green import ssl + to_patch.append(('ssl', ssl)) +except ImportError: + pass + +patcher.inject('urllib', globals(), *to_patch) -class URLopener(urllib.URLopener): +URLopener.open_http = patcher.patch_function(URLopener.open_http, ('httplib', httplib)) +if hasattr(URLopener, 'open_https'): + URLopener.open_https = patcher.patch_function(URLopener.open_https, ('httplib', httplib)) - def open_http(self, url, data=None): - """Use HTTP protocol.""" - from eventlet.green import httplib - user_passwd = None - proxy_passwd= None - if isinstance(url, str): - host, selector = splithost(url) - if host: - user_passwd, host = splituser(host) - host = unquote(host) - realhost = host - else: - host, selector = url - # check whether the proxy contains authorization information - proxy_passwd, host = splituser(host) - # now we proceed with the url we want to obtain - urltype, rest = splittype(selector) - url = rest - user_passwd = None - if urltype.lower() != 'http': - realhost = None - else: - realhost, rest = splithost(rest) - if realhost: - user_passwd, realhost = splituser(realhost) - if user_passwd: - selector = "%s://%s%s" % (urltype, realhost, rest) - if proxy_bypass(realhost): - host = realhost +URLopener.open_ftp = patcher.patch_function(URLopener.open_ftp, ('ftplib', ftplib)) +ftpwrapper.init = patcher.patch_function(ftpwrapper.init, ('ftplib', ftplib)) +ftpwrapper.retrfile = patcher.patch_function(ftpwrapper.retrfile, ('ftplib', ftplib)) - #print "proxy via http:", host, selector - if not host: raise IOError, ('http error', 'no host given') - - if proxy_passwd: - import base64 - proxy_auth = base64.b64encode(proxy_passwd).strip() - else: - proxy_auth = None - - if user_passwd: - import base64 - auth = base64.b64encode(user_passwd).strip() - else: - auth = None - h = httplib.HTTP(host) - if data is not None: - h.putrequest('POST', selector) - h.putheader('Content-Type', 'application/x-www-form-urlencoded') - h.putheader('Content-Length', '%d' % len(data)) - else: - h.putrequest('GET', selector) - if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) - if auth: h.putheader('Authorization', 'Basic %s' % auth) - if realhost: h.putheader('Host', realhost) - for args in self.addheaders: h.putheader(*args) - h.endheaders() - if data is not None: - h.send(data) - errcode, errmsg, headers = h.getreply() - if errcode == -1: - # something went wrong with the HTTP status line - raise IOError, ('http protocol error', 0, - 'got a bad status line', None) - fp = h.getfile() - if errcode == 200: - return addinfourl(fp, headers, "http:" + url) - else: - if data is None: - return self.http_error(url, fp, errcode, errmsg, headers) - else: - return self.http_error(url, fp, errcode, errmsg, headers, data) - - if hasattr(socket, "ssl"): - def open_https(self, url, data=None): - """Use HTTPS protocol.""" - from eventlet.green import httplib - user_passwd = None - proxy_passwd = None - if isinstance(url, str): - host, selector = splithost(url) - if host: - user_passwd, host = splituser(host) - host = unquote(host) - realhost = host - else: - host, selector = url - # here, we determine, whether the proxy contains authorization information - proxy_passwd, host = splituser(host) - urltype, rest = splittype(selector) - url = rest - user_passwd = None - if urltype.lower() != 'https': - realhost = None - else: - realhost, rest = splithost(rest) - if realhost: - user_passwd, realhost = splituser(realhost) - if user_passwd: - selector = "%s://%s%s" % (urltype, realhost, rest) - #print "proxy via https:", host, selector - if not host: raise IOError, ('https error', 'no host given') - if proxy_passwd: - import base64 - proxy_auth = base64.b64encode(proxy_passwd).strip() - else: - proxy_auth = None - if user_passwd: - import base64 - auth = base64.b64encode(user_passwd).strip() - else: - auth = None - h = httplib.HTTPS(host, 0, - key_file=self.key_file, - cert_file=self.cert_file) - if data is not None: - h.putrequest('POST', selector) - h.putheader('Content-Type', - 'application/x-www-form-urlencoded') - h.putheader('Content-Length', '%d' % len(data)) - else: - h.putrequest('GET', selector) - if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) - if auth: h.putheader('Authorization', 'Basic %s' % auth) - if realhost: h.putheader('Host', realhost) - for args in self.addheaders: h.putheader(*args) - h.endheaders() - if data is not None: - h.send(data) - errcode, errmsg, headers = h.getreply() - if errcode == -1: - # something went wrong with the HTTP status line - raise IOError, ('http protocol error', 0, - 'got a bad status line', None) - fp = h.getfile() - if errcode == 200: - return addinfourl(fp, headers, "https:" + url) - else: - if data is None: - return self.http_error(url, fp, errcode, errmsg, headers) - else: - return self.http_error(url, fp, errcode, errmsg, headers, - data) - - def open_gopher(self, url): - """Use Gopher protocol.""" - if not isinstance(url, str): - raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented') - from eventlet.green import gopherlib - host, selector = splithost(url) - if not host: raise IOError, ('gopher error', 'no host given') - host = unquote(host) - type, selector = splitgophertype(selector) - selector, query = splitquery(selector) - selector = unquote(selector) - if query: - query = unquote(query) - fp = gopherlib.send_query(selector, query, host) - else: - fp = gopherlib.send_selector(selector, host) - return addinfourl(fp, noheaders(), "gopher:" + url) - - def open_local_file(self, url): - """Use local file.""" - import mimetypes, mimetools, email.Utils - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - host, file = splithost(url) - localname = url2pathname(file) - try: - stats = os.stat(localname) - except OSError, e: - raise IOError(e.errno, e.strerror, e.filename) - size = stats.st_size - modified = email.Utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(url)[0] - headers = mimetools.Message(StringIO( - 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified))) - if not host: - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - return addinfourl(open(localname, 'rb'), - headers, urlfile) - host, port = splitport(host) - if not port \ - and socket.gethostbyname(host) in (localhost(), thishost()): - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - return addinfourl(open(localname, 'rb'), - headers, urlfile) - raise IOError, ('local file error', 'not on local host') - - def open_ftp(self, url): - """Use FTP protocol.""" - if not isinstance(url, str): - raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented') - import mimetypes, mimetools - try: - from cStringIO import StringIO - except ImportError: - from StringIO import StringIO - host, path = splithost(url) - if not host: raise IOError, ('ftp error', 'no host given') - host, port = splitport(host) - user, host = splituser(host) - if user: user, passwd = splitpasswd(user) - else: passwd = None - host = unquote(host) - user = unquote(user or '') - passwd = unquote(passwd or '') - host = socket.gethostbyname(host) - if not port: - from eventlet.green import ftplib - port = ftplib.FTP_PORT - else: - port = int(port) - path, attrs = splitattr(path) - path = unquote(path) - dirs = path.split('/') - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: dirs = dirs[1:] - if dirs and not dirs[0]: dirs[0] = '/' - key = user, host, port, '/'.join(dirs) - # XXX thread unsafe! - if len(self.ftpcache) > MAXFTPCACHE: - # Prune the cache, rather arbitrarily - for k in self.ftpcache.keys(): - if k != key: - v = self.ftpcache[k] - del self.ftpcache[k] - v.close() - try: - if not key in self.ftpcache: - self.ftpcache[key] = \ - ftpwrapper(user, passwd, host, port, dirs) - if not file: type = 'D' - else: type = 'I' - for attr in attrs: - attr, value = splitvalue(attr) - if attr.lower() == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = value.upper() - (fp, retrlen) = self.ftpcache[key].retrfile(file, type) - mtype = mimetypes.guess_type("ftp:" + url)[0] - headers = "" - if mtype: - headers += "Content-Type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: - headers += "Content-Length: %d\n" % retrlen - headers = mimetools.Message(StringIO(headers)) - return addinfourl(fp, headers, "ftp:" + url) - except ftperrors(), msg: - raise IOError, ('ftp error', msg), sys.exc_info()[2] - -# this one is copied verbatim -class FancyURLopener(URLopener): - """Derived class with handlers for errors we can handle (perhaps).""" - - def __init__(self, *args, **kwargs): - URLopener.__init__(self, *args, **kwargs) - self.auth_cache = {} - self.tries = 0 - self.maxtries = 10 - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handling -- don't raise an exception.""" - return addinfourl(fp, headers, "http:" + url) - - def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): - """Error 302 -- relocated (temporarily).""" - self.tries += 1 - if self.maxtries and self.tries >= self.maxtries: - if hasattr(self, "http_error_500"): - meth = self.http_error_500 - else: - meth = self.http_error_default - self.tries = 0 - return meth(url, fp, 500, - "Internal Server Error: Redirect Recursion", headers) - result = self.redirect_internal(url, fp, errcode, errmsg, headers, - data) - self.tries = 0 - return result - - def redirect_internal(self, url, fp, errcode, errmsg, headers, data): - if 'location' in headers: - newurl = headers['location'] - elif 'uri' in headers: - newurl = headers['uri'] - else: - return - void = fp.read() - fp.close() - # In case the server sent a relative URL, join with original: - newurl = basejoin(self.type + ":" + url, newurl) - return self.open(newurl) - - def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - """Error 301 -- also relocated (permanently).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): - """Error 303 -- also relocated (essentially identical to 302).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): - """Error 307 -- relocated, but turn POST into error.""" - if data is None: - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - else: - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None): - """Error 401 -- authentication required. - This function supports Basic authentication only.""" - if not 'www-authenticate' in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['www-authenticate'] - import re - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - name = 'retry_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def http_error_407(self, url, fp, errcode, errmsg, headers, data=None): - """Error 407 -- proxy authentication required. - This function supports Basic authentication only.""" - if not 'proxy-authenticate' in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['proxy-authenticate'] - import re - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - name = 'retry_proxy_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def retry_proxy_http_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - newurl = 'http://' + host + selector - proxy = self.proxies['http'] - urltype, proxyhost = splittype(proxy) - proxyhost, proxyselector = splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost - self.proxies['http'] = 'http://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_proxy_https_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - newurl = 'https://' + host + selector - proxy = self.proxies['https'] - urltype, proxyhost = splittype(proxy) - proxyhost, proxyselector = splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost - self.proxies['https'] = 'https://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_http_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host - newurl = 'http://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_https_basic_auth(self, url, realm, data=None): - host, selector = splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host - newurl = 'https://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def get_user_passwd(self, host, realm, clear_cache = 0): - key = realm + '@' + host.lower() - if key in self.auth_cache: - if clear_cache: - del self.auth_cache[key] - else: - return self.auth_cache[key] - user, passwd = self.prompt_user_passwd(host, realm) - if user or passwd: self.auth_cache[key] = (user, passwd) - return user, passwd - - def prompt_user_passwd(self, host, realm): - """Override this in a GUI environment!""" - import getpass - try: - user = raw_input("Enter username for %s at %s: " % (realm, - host)) - passwd = getpass.getpass("Enter password for %s in %s at %s: " % - (user, realm, host)) - return user, passwd - except KeyboardInterrupt: - print - return None, None - - -# Utility functions - -_localhost = None -def localhost(): - """Return the IP address of the magic hostname 'localhost'.""" - global _localhost - if _localhost is None: - _localhost = socket.gethostbyname('localhost') - return _localhost - -_thishost = None -def thishost(): - """Return the IP address of the current host.""" - global _thishost - if _thishost is None: - _thishost = socket.gethostbyname(socket.gethostname()) - return _thishost - -_ftperrors = None -def ftperrors(): - """Return the set of errors raised by the FTP class.""" - global _ftperrors - if _ftperrors is None: - from eventlet.green import ftplib - _ftperrors = ftplib.all_errors - return _ftperrors - - -# Utility classes - -class ftpwrapper(urllib.ftpwrapper): - """Class used by open_ftp() for cache of open FTP connections.""" - - def init(self): - from eventlet.green import ftplib - self.busy = 0 - self.ftp = ftplib.FTP() - self.ftp.connect(self.host, self.port) - self.ftp.login(self.user, self.passwd) - for dir in self.dirs: - self.ftp.cwd(dir) - - def retrfile(self, file, type): - from eventlet.green import ftplib - self.endtransfer() - if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 - else: cmd = 'TYPE ' + type; isdir = 0 - try: - self.ftp.voidcmd(cmd) - except ftplib.all_errors: - self.init() - self.ftp.voidcmd(cmd) - conn = None - if file and not isdir: - # Try to retrieve as a file - try: - cmd = 'RETR ' + file - conn = self.ftp.ntransfercmd(cmd) - except ftplib.error_perm, reason: - if str(reason)[:3] != '550': - raise IOError, ('ftp error', reason), sys.exc_info()[2] - if not conn: - # Set transfer mode to ASCII! - self.ftp.voidcmd('TYPE A') - # Try a directory listing - if file: cmd = 'LIST ' + file - else: cmd = 'LIST' - conn = self.ftp.ntransfercmd(cmd) - self.busy = 1 - # Pass back both a suitably decorated object and a retrieval length - return (addclosehook(conn[0].makefile('rb'), - self.endtransfer), conn[1]) - -# Test and time quote() and unquote() -def test1(): - s = '' - for i in range(256): s = s + chr(i) - s = s*4 - t0 = time.time() - qs = quote(s) - uqs = unquote(qs) - t1 = time.time() - if uqs != s: - print 'Wrong!' - print repr(s) - print repr(qs) - print repr(uqs) - print round(t1 - t0, 3), 'sec' - - -def reporthook(blocknum, blocksize, totalsize): - # Report during remote transfers - print "Block number: %d, Block size: %d, Total size: %d" % ( - blocknum, blocksize, totalsize) - -# Test program -def test(args=[]): - if not args: - args = [ - '/etc/passwd', - 'file:/etc/passwd', - 'file://localhost/etc/passwd', - 'ftp://ftp.gnu.org/pub/README', -## 'gopher://gopher.micro.umn.edu/1/', - 'http://www.python.org/index.html', - ] - if hasattr(URLopener, "open_https"): - args.append('https://synergy.as.cmu.edu/~geek/') - try: - for url in args: - print '-'*10, url, '-'*10 - fn, h = urlretrieve(url, None, reporthook) - print fn - if h: - print '======' - for k in h.keys(): print k + ':', h[k] - print '======' - fp = open(fn, 'rb') - data = fp.read() - del fp - if '\r' in data: - table = string.maketrans("", "") - data = data.translate(table, "\r") - print data - fn, h = None, None - print '-'*40 - finally: - urlcleanup() - -def main(): - import getopt, sys - try: - opts, args = getopt.getopt(sys.argv[1:], "th") - except getopt.error, msg: - print msg - print "Use -h for help" - return - t = 0 - for o, a in opts: - if o == '-t': - t = t + 1 - if o == '-h': - print "Usage: python urllib.py [-t] [url ...]" - print "-t runs self-test;", - print "otherwise, contents of urls are printed" - return - if t: - if t > 1: - test1() - test(args) - else: - if not args: - print "Use -h for help" - for url in args: - print urlopen(url).read(), +del patcher # Run test program when run as a script if __name__ == '__main__': diff --git a/eventlet/patcher.py b/eventlet/patcher.py index 7395a8c..b43fd93 100644 --- a/eventlet/patcher.py +++ b/eventlet/patcher.py @@ -1,7 +1,7 @@ import sys -__exclude = ('__builtins__', '__file__', '__name__') +__exclude = set(('__builtins__', '__file__', '__name__')) def inject(module_name, new_globals, *additional_modules): @@ -43,3 +43,20 @@ def import_patched(module_name, *additional_modules, **kw_additional_modules): None, *additional_modules + tuple(kw_additional_modules.items())) +def patch_function(func, *additional_modules): + """Huge hack here -- patches the specified modules for the + duration of the function call.""" + def patched(*args, **kw): + saved = {} + for name, mod in additional_modules: + saved[name] = sys.modules.get(name, None) + sys.modules[name] = mod + try: + return func(*args, **kw) + finally: + ## Put all the saved modules back + for name, mod in additional_modules: + if saved[name] is not None: + sys.modules[name] = saved[name] + return patched + \ No newline at end of file diff --git a/tests/stdlib/test_ftplib.py b/tests/stdlib/test_ftplib.py new file mode 100644 index 0000000..7317af4 --- /dev/null +++ b/tests/stdlib/test_ftplib.py @@ -0,0 +1,13 @@ +from eventlet import patcher +from eventlet.green import ftplib +from eventlet.green import threading +from eventlet.green import socket + +patcher.inject('test.test_ftplib', + globals(), + ('ftplib', ftplib), + ('socket', socket), + ('threading', threading)) + +if __name__ == "__main__": + test_main() \ No newline at end of file diff --git a/tests/stdlib/test_urllib.py b/tests/stdlib/test_urllib.py new file mode 100644 index 0000000..41f9e6a --- /dev/null +++ b/tests/stdlib/test_urllib.py @@ -0,0 +1,11 @@ +from eventlet import patcher +from eventlet.green import httplib +from eventlet.green import urllib + +patcher.inject('test.test_urllib', + globals(), + ('httplib', httplib), + ('urllib', urllib)) + +if __name__ == "__main__": + test_main() \ No newline at end of file