[sitemap] do not add empty URLs to the list of start URLs
This will solve the 'ValueError: Missing scheme in request url' exception. Change-Id: I295b2a519668b36413ec2ba4ef3ba9dfaac6e859
This commit is contained in:
parent
b4f4013423
commit
44243238ac
|
@ -58,8 +58,10 @@ class SitemapSpider(spiders.CrawlSpider):
|
||||||
super(SitemapSpider, self).__init__(*args, **kwargs)
|
super(SitemapSpider, self).__init__(*args, **kwargs)
|
||||||
self.domain = domain
|
self.domain = domain
|
||||||
self.allowed_domains = [domain]
|
self.allowed_domains = [domain]
|
||||||
self.start_urls = ['http://%s/index.html' % domain]
|
self.start_urls = ['http://%s' % domain]
|
||||||
for url in urls.split(','):
|
for url in urls.split(','):
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
self.start_urls.append(url)
|
self.start_urls.append(url)
|
||||||
|
|
||||||
def parse_item(self, response):
|
def parse_item(self, response):
|
||||||
|
|
Loading…
Reference in New Issue