[sitemap] introduce attribute to define start URLs
Change-Id: I09018a824310ef48f6b62f04f09d6fd704808119 Closes-bug: #1506601
This commit is contained in:
parent
6b5b7d530a
commit
815fcb5bb6
|
@ -24,6 +24,11 @@ The result will be available in the file ``sitemap_developer.openstack.org.xml``
|
|||
|
||||
To write log messages into a file append the parameter ``-s LOG_FILE=scrapy.log``.
|
||||
|
||||
It is possible to define a set of additional start URLs using the attribute
|
||||
``urls``. Separate multiple URLs with ``,``.
|
||||
|
||||
$ scrapy crawl sitemap -a domain=developer.openstack.org -a urls="http://developer.openstack.org/de/api-guide/quick-start/"
|
||||
|
||||
Dependencies
|
||||
============
|
||||
|
||||
|
|
|
@ -42,13 +42,13 @@ class SitemapSpider(spiders.CrawlSpider):
|
|||
)
|
||||
]
|
||||
|
||||
def __init__(self, domain='docs.openstack.org', *args, **kwargs):
|
||||
def __init__(self, domain='docs.openstack.org', urls='', *args, **kwargs):
|
||||
super(SitemapSpider, self).__init__(*args, **kwargs)
|
||||
self.domain = domain
|
||||
self.allowed_domains = [domain]
|
||||
self.start_urls = [
|
||||
'http://%s/index.html' % domain,
|
||||
]
|
||||
self.start_urls = ['http://%s/index.html' % domain]
|
||||
for url in urls.split(','):
|
||||
self.start_urls.append(url)
|
||||
|
||||
def parse_item(self, response):
|
||||
item = items.SitemapItem()
|
||||
|
|
Loading…
Reference in New Issue