[sitemap] set higher priority for files of the current release
Change-Id: I9dbaa787354582f2f766fcce58aff95766d242c7
This commit is contained in:
parent
b60e29dcf1
commit
8ebb376bb8
|
@ -10,7 +10,6 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import posixpath
|
||||
import time
|
||||
import urlparse
|
||||
|
||||
|
@ -21,6 +20,19 @@ from scrapy import spiders
|
|||
|
||||
class SitemapSpider(spiders.CrawlSpider):
|
||||
name = 'sitemap'
|
||||
old_releases = tuple(["/%s" % old_release for old_release in [
|
||||
'austin',
|
||||
'bexar',
|
||||
'cactus',
|
||||
'diablo',
|
||||
'essex',
|
||||
'folsom',
|
||||
'grizzly',
|
||||
'havana',
|
||||
'icehouse',
|
||||
'juno',
|
||||
'kilo'
|
||||
]])
|
||||
|
||||
rules = [
|
||||
spiders.Rule(
|
||||
|
@ -52,25 +64,17 @@ class SitemapSpider(spiders.CrawlSpider):
|
|||
|
||||
def parse_item(self, response):
|
||||
item = items.SitemapItem()
|
||||
item['priority'] = '0.5'
|
||||
item['changefreq'] = 'daily'
|
||||
item['loc'] = response.url
|
||||
|
||||
path = urlparse.urlsplit(response.url).path
|
||||
filename = posixpath.basename(path)
|
||||
|
||||
if filename == 'index.html' or filename == '':
|
||||
if path.startswith(self.old_releases):
|
||||
# weekly changefrequency and lower priority for old files
|
||||
item['priority'] = '0.5'
|
||||
item['changefreq'] = 'weekly'
|
||||
else:
|
||||
# daily changefrequency and highest priority for current files
|
||||
item['priority'] = '1.0'
|
||||
|
||||
weekly = [
|
||||
'juno',
|
||||
'icehouse',
|
||||
'havana'
|
||||
]
|
||||
|
||||
for entry in weekly:
|
||||
if path.startswith("/%s" % entry):
|
||||
item['changefreq'] = 'weekly'
|
||||
item['changefreq'] = 'daily'
|
||||
|
||||
if 'Last-Modified' in response.headers:
|
||||
timestamp = response.headers['Last-Modified']
|
||||
|
|
Loading…
Reference in New Issue