examples: webcrawler: urls tuple->list + more style fixes

This commit is contained in:
Sergey Shepelev
2013-01-04 04:26:31 +04:00
parent 2470941639
commit 59cc495565

View File

@@ -1,29 +1,30 @@
#! /usr/bin/env python #!/usr/bin/env python
""" """
This is a simple web "crawler" that fetches a bunch of urls using a pool to This is a simple web "crawler" that fetches a bunch of urls using a pool to
control the number of outbound connections. It has as many simultaneously open control the number of outbound connections. It has as many simultaneously open
connections as coroutines in the pool. connections as coroutines in the pool.
The prints in the body of the fetch function are there to demonstrate that the The prints in the body of the fetch function are there to demonstrate that the
requests are truly made in parallel. requests are truly made in parallel.
""" """
urls = (
"https://www.google.com/intl/en_ALL/images/logo.gif",
"http://python.org/images/python-logo.gif",
"http://us.i1.yimg.com/us.yimg.com/i/ww/beta/y3.gif"
)
import eventlet import eventlet
from eventlet.green import urllib2 from eventlet.green import urllib2
urls = [
"https://www.google.com/intl/en_ALL/images/logo.gif",
"http://python.org/images/python-logo.gif",
"http://us.i1.yimg.com/us.yimg.com/i/ww/beta/y3.gif",
]
def fetch(url): def fetch(url):
print "opening", url print "opening", url
body = urllib2.urlopen(url).read() body = urllib2.urlopen(url).read()
print "done with", url print "done with", url
return url, body return url, body
pool = eventlet.GreenPool(200) pool = eventlet.GreenPool(200)
for url, body in pool.imap(fetch, urls): for url, body in pool.imap(fetch, urls):
print "got body from", url, "of length", len(body) print "got body from", url, "of length", len(body)