From 5d4d1668824c255ce70129be33834800c3000a8a Mon Sep 17 00:00:00 2001 From: Ryan Williams Date: Mon, 29 Mar 2010 00:19:23 -0700 Subject: [PATCH] Updated web crawler example on front page. --- doc/real_index.html | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/doc/real_index.html b/doc/real_index.html index 62ca706..98d4927 100644 --- a/doc/real_index.html +++ b/doc/real_index.html @@ -68,29 +68,22 @@ easy_install eventlet

Web Crawler ExampleΒΆ

This is a simple web “crawler” that fetches a bunch of urls using a coroutine pool. It has as much concurrency (i.e. pages being fetched simultaneously) as coroutines in the pool.

+
urls = ["http://www.google.com/intl/en_ALL/images/logo.gif",
-       "http://wiki.secondlife.com/w/images/secondlife.jpg",
-       "http://us.i1.yimg.com/us.yimg.com/i/ww/beta/y3.gif"]
+     "https://wiki.secondlife.com/w/images/secondlife.jpg",
+     "http://us.i1.yimg.com/us.yimg.com/i/ww/beta/y3.gif"]
 
-import time
-from eventlet import coros
-
-# this imports a special version of the urllib2 module that uses non-blocking IO
+import eventlet
 from eventlet.green import urllib2
 
 def fetch(url):
-    print "%s fetching %s" % (time.asctime(), url)
-    data = urllib2.urlopen(url)
-    print "%s fetched %s" % (time.asctime(), data)
 
-pool = coros.CoroutinePool(max_size=4)
-waiters = []
-for url in urls:
-    waiters.append(pool.execute(fetch, url))
+  return urllib2.urlopen(url).read()
 
-# wait for all the coroutines to come back before exiting the process
-for waiter in waiters:
-    waiter.wait()
+pool = eventlet.GreenPool()
+
+for body in pool.imap(fetch, urls):
+  print "got body", len(body)
 

Stats