Move SitemapItem class into generator.spiders.sitemap_file

This will solve the following issue:

ImportError: No module named sitemap.generator

Change-Id: I449f0fb0c87613c36447cca94f4f5857f3d31afa
This commit is contained in:
Christian Berendt 2016-10-06 13:33:43 +02:00
parent 6b669e8778
commit 92a4fb6527
2 changed files with 10 additions and 23 deletions

View File

@ -1,21 +0,0 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import scrapy
class SitemapItem(scrapy.item.Item):
'''Class to represent an item in the sitemap.'''
loc = scrapy.item.Field()
lastmod = scrapy.item.Field()
priority = scrapy.item.Field()
changefreq = scrapy.item.Field()

View File

@ -13,9 +13,17 @@
import time import time
import urlparse import urlparse
from scrapy import item
from scrapy.linkextractors import LinkExtractor from scrapy.linkextractors import LinkExtractor
from scrapy import spiders from scrapy import spiders
from sitemap.generator import items
class SitemapItem(item.Item):
'''Class to represent an item in the sitemap.'''
loc = item.Field()
lastmod = item.Field()
priority = item.Field()
changefreq = item.Field()
class SitemapSpider(spiders.CrawlSpider): class SitemapSpider(spiders.CrawlSpider):
@ -68,7 +76,7 @@ class SitemapSpider(spiders.CrawlSpider):
self.start_urls.append(url) self.start_urls.append(url)
def parse_item(self, response): def parse_item(self, response):
item = items.SitemapItem() item = SitemapItem()
item['loc'] = response.url item['loc'] = response.url
path = urlparse.urlsplit(response.url).path path = urlparse.urlsplit(response.url).path