Browse Source

[sitemap] set higher priority for files of the current release

Change-Id: I9dbaa787354582f2f766fcce58aff95766d242c7
tags/0.33.0
Christian Berendt 3 years ago
parent
commit
8ebb376bb8
1 changed files with 20 additions and 16 deletions
  1. 20
    16
      sitemap/generator/spiders/sitemap.py

+ 20
- 16
sitemap/generator/spiders/sitemap.py View File

@@ -10,7 +10,6 @@
# License for the specific language governing permissions and limitations
# under the License.

import posixpath
import time
import urlparse

@@ -21,6 +20,19 @@ from scrapy import spiders

class SitemapSpider(spiders.CrawlSpider):
name = 'sitemap'
old_releases = tuple(["/%s" % old_release for old_release in [
'austin',
'bexar',
'cactus',
'diablo',
'essex',
'folsom',
'grizzly',
'havana',
'icehouse',
'juno',
'kilo'
]])

rules = [
spiders.Rule(
@@ -52,25 +64,17 @@ class SitemapSpider(spiders.CrawlSpider):

def parse_item(self, response):
item = items.SitemapItem()
item['priority'] = '0.5'
item['changefreq'] = 'daily'
item['loc'] = response.url

path = urlparse.urlsplit(response.url).path
filename = posixpath.basename(path)

if filename == 'index.html' or filename == '':
if path.startswith(self.old_releases):
# weekly changefrequency and lower priority for old files
item['priority'] = '0.5'
item['changefreq'] = 'weekly'
else:
# daily changefrequency and highest priority for current files
item['priority'] = '1.0'

weekly = [
'juno',
'icehouse',
'havana'
]

for entry in weekly:
if path.startswith("/%s" % entry):
item['changefreq'] = 'weekly'
item['changefreq'] = 'daily'

if 'Last-Modified' in response.headers:
timestamp = response.headers['Last-Modified']

Loading…
Cancel
Save