From 44243238accf38d404b0fe7a8b1c123eda55caa1 Mon Sep 17 00:00:00 2001 From: Christian Berendt Date: Fri, 22 Jan 2016 14:58:44 +0100 Subject: [PATCH] [sitemap] do not add empty URLs to the list of start URLs This will solve the 'ValueError: Missing scheme in request url' exception. Change-Id: I295b2a519668b36413ec2ba4ef3ba9dfaac6e859 --- sitemap/generator/spiders/sitemap.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sitemap/generator/spiders/sitemap.py b/sitemap/generator/spiders/sitemap.py index 1c364054..69e578ac 100644 --- a/sitemap/generator/spiders/sitemap.py +++ b/sitemap/generator/spiders/sitemap.py @@ -58,8 +58,10 @@ class SitemapSpider(spiders.CrawlSpider): super(SitemapSpider, self).__init__(*args, **kwargs) self.domain = domain self.allowed_domains = [domain] - self.start_urls = ['http://%s/index.html' % domain] + self.start_urls = ['http://%s' % domain] for url in urls.split(','): + if not url: + continue self.start_urls.append(url) def parse_item(self, response):