examples: webcrawler: urls tuple->list + more style fixes
This commit is contained in:
@@ -1,29 +1,30 @@
|
|||||||
#! /usr/bin/env python
|
#!/usr/bin/env python
|
||||||
"""
|
"""
|
||||||
This is a simple web "crawler" that fetches a bunch of urls using a pool to
|
This is a simple web "crawler" that fetches a bunch of urls using a pool to
|
||||||
control the number of outbound connections. It has as many simultaneously open
|
control the number of outbound connections. It has as many simultaneously open
|
||||||
connections as coroutines in the pool.
|
connections as coroutines in the pool.
|
||||||
|
|
||||||
The prints in the body of the fetch function are there to demonstrate that the
|
The prints in the body of the fetch function are there to demonstrate that the
|
||||||
requests are truly made in parallel.
|
requests are truly made in parallel.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
urls = (
|
|
||||||
"https://www.google.com/intl/en_ALL/images/logo.gif",
|
|
||||||
"http://python.org/images/python-logo.gif",
|
|
||||||
"http://us.i1.yimg.com/us.yimg.com/i/ww/beta/y3.gif"
|
|
||||||
)
|
|
||||||
|
|
||||||
import eventlet
|
import eventlet
|
||||||
from eventlet.green import urllib2
|
from eventlet.green import urllib2
|
||||||
|
|
||||||
|
|
||||||
|
urls = [
|
||||||
|
"https://www.google.com/intl/en_ALL/images/logo.gif",
|
||||||
|
"http://python.org/images/python-logo.gif",
|
||||||
|
"http://us.i1.yimg.com/us.yimg.com/i/ww/beta/y3.gif",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def fetch(url):
|
def fetch(url):
|
||||||
print "opening", url
|
print "opening", url
|
||||||
body = urllib2.urlopen(url).read()
|
body = urllib2.urlopen(url).read()
|
||||||
print "done with", url
|
print "done with", url
|
||||||
return url, body
|
return url, body
|
||||||
|
|
||||||
|
|
||||||
pool = eventlet.GreenPool(200)
|
pool = eventlet.GreenPool(200)
|
||||||
for url, body in pool.imap(fetch, urls):
|
for url, body in pool.imap(fetch, urls):
|
||||||
print "got body from", url, "of length", len(body)
|
print "got body from", url, "of length", len(body)
|
||||||
|
Reference in New Issue
Block a user