From b9d280acce708176ab41541b638b0ccdbc3f1027 Mon Sep 17 00:00:00 2001 From: Andreas Jaeger Date: Tue, 24 Mar 2020 11:13:33 +0100 Subject: [PATCH] Update sitemap for Python3 Fix sitemap code to work under Python3. Change-Id: I7aad21f612047ce264b93460f7604b63fa951cab --- sitemap/generator/pipelines.py | 3 ++- sitemap/generator/spiders/sitemap_file.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sitemap/generator/pipelines.py b/sitemap/generator/pipelines.py index 50f385b6..dad61af8 100644 --- a/sitemap/generator/pipelines.py +++ b/sitemap/generator/pipelines.py @@ -83,7 +83,8 @@ class ExportSitemap(object): % spider.domain)) with open(os.path.join(os.getcwd(), "sitemap_%s.xml" % spider.domain), 'w') as pretty: - pretty.write(lxml.etree.tostring(tree, pretty_print=True)) + pretty.write(lxml.etree.tostring(tree, pretty_print=True, + encoding='unicode')) def process_item(self, item, spider): self.exporter.export_item(item) diff --git a/sitemap/generator/spiders/sitemap_file.py b/sitemap/generator/spiders/sitemap_file.py index dfbc41e2..8229540d 100644 --- a/sitemap/generator/spiders/sitemap_file.py +++ b/sitemap/generator/spiders/sitemap_file.py @@ -110,6 +110,7 @@ class SitemapSpider(spiders.CrawlSpider): timestamp = response.headers['Last-Modified'] else: timestamp = response.headers['Date'] - lastmod = time.strptime(timestamp, "%a, %d %b %Y %H:%M:%S %Z") + lastmod = time.strptime(timestamp.decode("utf-8"), + "%a, %d %b %Y %H:%M:%S %Z") item['lastmod'] = time.strftime("%Y-%m-%dT%H:%M:%S%z", lastmod) return item