Browse Source

Change priorities for sitemap

Downgrade old EOL releases, give current maintained releases and
development versions a higher priority.

this needs changes to the test framework, update mocking. Also, rename
URL to be docs.openstack.org and not .com.

Change-Id: I2c2c0408e203a65a9541baaf55ffe60694463975
tags/1.6.0
Andreas Jaeger 2 years ago
parent
commit
6898821aeb
2 changed files with 48 additions and 18 deletions
  1. 25
    10
      sitemap/generator/spiders/sitemap_file.py
  2. 23
    8
      test/test_sitemap_file.py

+ 25
- 10
sitemap/generator/spiders/sitemap_file.py View File

@@ -10,6 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.

import re
import time
try:
import urlparse
@@ -31,7 +32,8 @@ class SitemapItem(item.Item):

class SitemapSpider(spiders.CrawlSpider):
name = 'sitemap'
old_releases = tuple(["/%s" % old_release for old_release in [

EOL_SERIES = [
'austin',
'bexar',
'cactus',
@@ -44,10 +46,16 @@ class SitemapSpider(spiders.CrawlSpider):
'juno',
'kilo',
'liberty',
'mitaka',
'mitaka'
]
EOL_RELEASES_PAT = re.compile('^/(' + '|'.join(EOL_SERIES) + ')/')
MAINT_SERIES = [
'newton',
'ocata'
]])
'ocata',
'pike'
]
MAINT_RELEASES_PAT = re.compile('^/(' + '|'.join(MAINT_SERIES) + ')/')
LATEST_PAT = re.compile('^/latest/')

rules = [
spiders.Rule(
@@ -62,9 +70,6 @@ class SitemapSpider(spiders.CrawlSpider):
deny=[
r'/trunk/',
r'/draft/',
r'/api/',
r'/juno/',
r'/icehouse/'
]
),
follow=True, callback='parse_item'
@@ -86,11 +91,21 @@ class SitemapSpider(spiders.CrawlSpider):
item['loc'] = response.url

path = urlparse.urlsplit(response.url).path
if path.startswith(self.old_releases):
# weekly changefrequency and lower priority for old files
item['priority'] = '0.5'

if self.MAINT_RELEASES_PAT.match(path):
# weekly changefrequency and highest prio for maintained release
item['priority'] = '1.0'
item['changefreq'] = 'weekly'
elif self.LATEST_PAT.match(path):
# daily changefrequency and high priority for current files
item['priority'] = '0.8'
item['changefreq'] = 'daily'
elif self.EOL_RELEASES_PAT.match(path):
# yearly changefrequency and lowest priority for old stable files
item['priority'] = '0.1'
item['changefreq'] = 'yearly'
else:
# These are unversioned documents
# daily changefrequency and highest priority for current files
item['priority'] = '1.0'
item['changefreq'] = 'daily'

+ 23
- 8
test/test_sitemap_file.py View File

@@ -58,10 +58,17 @@ class TestSitemapSpider(unittest.TestCase):

def test_parse_items_inits_sitemap(self):
response = mock.MagicMock()
path = sitemap_file.urlparse.SplitResult(
scheme='https',
netloc='docs.openstack.org',
path='/ocata/something.html',
query='',
fragment=''
)
with mock.patch.object(sitemap_file,
'SitemapItem') as mocked_sitemap_item:
with mock.patch.object(sitemap_file.urlparse,
'urlsplit'):
with mock.patch.object(sitemap_file.urlparse, 'urlsplit',
return_value=path):
with mock.patch.object(sitemap_file, 'time'):
self.spider.parse_item(response)

@@ -69,9 +76,17 @@ class TestSitemapSpider(unittest.TestCase):

def test_parse_items_gets_path(self):
response = mock.MagicMock()
path = sitemap_file.urlparse.SplitResult(
scheme='https',
netloc='docs.openstackorg',
path='/ocata/something.html',
query='',
fragment=''
)
with mock.patch.object(sitemap_file, 'SitemapItem'):
with mock.patch.object(sitemap_file.urlparse,
'urlsplit') as mocked_urlsplit:
'urlsplit',
return_value=path) as mocked_urlsplit:
with mock.patch.object(sitemap_file, 'time'):
self.spider.parse_item(response)

@@ -81,8 +96,8 @@ class TestSitemapSpider(unittest.TestCase):
response = mock.MagicMock()
path = sitemap_file.urlparse.SplitResult(
scheme='https',
netloc='docs.openstack.com',
path='/mitaka',
netloc='docs.openstack.org',
path='/ocata/something.html',
query='',
fragment=''
)
@@ -91,14 +106,14 @@ class TestSitemapSpider(unittest.TestCase):
with mock.patch.object(sitemap_file, 'time'):
returned_item = self.spider.parse_item(response)

self.assertEqual('0.5', returned_item['priority'])
self.assertEqual('1.0', returned_item['priority'])
self.assertEqual('weekly', returned_item['changefreq'])

def test_parse_items_high_priority_daily_freq(self):
response = mock.MagicMock()
path = sitemap_file.urlparse.SplitResult(
scheme='https',
netloc='docs.openstack.com',
netloc='docs.openstack.org',
path='/contributor-guide',
query='',
fragment=''
@@ -115,7 +130,7 @@ class TestSitemapSpider(unittest.TestCase):
response = mock.MagicMock()
path = sitemap_file.urlparse.SplitResult(
scheme='https',
netloc='docs.openstack.com',
netloc='docs.openstack.org',
path='/ocata',
query='',
fragment=''

Loading…
Cancel
Save