Change priorities for sitemap
Downgrade old EOL releases, give current maintained releases and development versions a higher priority. this needs changes to the test framework, update mocking. Also, rename URL to be docs.openstack.org and not .com. Change-Id: I2c2c0408e203a65a9541baaf55ffe60694463975
This commit is contained in:
parent
f87192cc77
commit
6898821aeb
|
@ -10,6 +10,7 @@
|
||||||
# License for the specific language governing permissions and limitations
|
# License for the specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
try:
|
try:
|
||||||
import urlparse
|
import urlparse
|
||||||
|
@ -31,7 +32,8 @@ class SitemapItem(item.Item):
|
||||||
|
|
||||||
class SitemapSpider(spiders.CrawlSpider):
|
class SitemapSpider(spiders.CrawlSpider):
|
||||||
name = 'sitemap'
|
name = 'sitemap'
|
||||||
old_releases = tuple(["/%s" % old_release for old_release in [
|
|
||||||
|
EOL_SERIES = [
|
||||||
'austin',
|
'austin',
|
||||||
'bexar',
|
'bexar',
|
||||||
'cactus',
|
'cactus',
|
||||||
|
@ -44,10 +46,16 @@ class SitemapSpider(spiders.CrawlSpider):
|
||||||
'juno',
|
'juno',
|
||||||
'kilo',
|
'kilo',
|
||||||
'liberty',
|
'liberty',
|
||||||
'mitaka',
|
'mitaka'
|
||||||
|
]
|
||||||
|
EOL_RELEASES_PAT = re.compile('^/(' + '|'.join(EOL_SERIES) + ')/')
|
||||||
|
MAINT_SERIES = [
|
||||||
'newton',
|
'newton',
|
||||||
'ocata'
|
'ocata',
|
||||||
]])
|
'pike'
|
||||||
|
]
|
||||||
|
MAINT_RELEASES_PAT = re.compile('^/(' + '|'.join(MAINT_SERIES) + ')/')
|
||||||
|
LATEST_PAT = re.compile('^/latest/')
|
||||||
|
|
||||||
rules = [
|
rules = [
|
||||||
spiders.Rule(
|
spiders.Rule(
|
||||||
|
@ -62,9 +70,6 @@ class SitemapSpider(spiders.CrawlSpider):
|
||||||
deny=[
|
deny=[
|
||||||
r'/trunk/',
|
r'/trunk/',
|
||||||
r'/draft/',
|
r'/draft/',
|
||||||
r'/api/',
|
|
||||||
r'/juno/',
|
|
||||||
r'/icehouse/'
|
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
follow=True, callback='parse_item'
|
follow=True, callback='parse_item'
|
||||||
|
@ -86,11 +91,21 @@ class SitemapSpider(spiders.CrawlSpider):
|
||||||
item['loc'] = response.url
|
item['loc'] = response.url
|
||||||
|
|
||||||
path = urlparse.urlsplit(response.url).path
|
path = urlparse.urlsplit(response.url).path
|
||||||
if path.startswith(self.old_releases):
|
|
||||||
# weekly changefrequency and lower priority for old files
|
if self.MAINT_RELEASES_PAT.match(path):
|
||||||
item['priority'] = '0.5'
|
# weekly changefrequency and highest prio for maintained release
|
||||||
|
item['priority'] = '1.0'
|
||||||
item['changefreq'] = 'weekly'
|
item['changefreq'] = 'weekly'
|
||||||
|
elif self.LATEST_PAT.match(path):
|
||||||
|
# daily changefrequency and high priority for current files
|
||||||
|
item['priority'] = '0.8'
|
||||||
|
item['changefreq'] = 'daily'
|
||||||
|
elif self.EOL_RELEASES_PAT.match(path):
|
||||||
|
# yearly changefrequency and lowest priority for old stable files
|
||||||
|
item['priority'] = '0.1'
|
||||||
|
item['changefreq'] = 'yearly'
|
||||||
else:
|
else:
|
||||||
|
# These are unversioned documents
|
||||||
# daily changefrequency and highest priority for current files
|
# daily changefrequency and highest priority for current files
|
||||||
item['priority'] = '1.0'
|
item['priority'] = '1.0'
|
||||||
item['changefreq'] = 'daily'
|
item['changefreq'] = 'daily'
|
||||||
|
|
|
@ -58,10 +58,17 @@ class TestSitemapSpider(unittest.TestCase):
|
||||||
|
|
||||||
def test_parse_items_inits_sitemap(self):
|
def test_parse_items_inits_sitemap(self):
|
||||||
response = mock.MagicMock()
|
response = mock.MagicMock()
|
||||||
|
path = sitemap_file.urlparse.SplitResult(
|
||||||
|
scheme='https',
|
||||||
|
netloc='docs.openstack.org',
|
||||||
|
path='/ocata/something.html',
|
||||||
|
query='',
|
||||||
|
fragment=''
|
||||||
|
)
|
||||||
with mock.patch.object(sitemap_file,
|
with mock.patch.object(sitemap_file,
|
||||||
'SitemapItem') as mocked_sitemap_item:
|
'SitemapItem') as mocked_sitemap_item:
|
||||||
with mock.patch.object(sitemap_file.urlparse,
|
with mock.patch.object(sitemap_file.urlparse, 'urlsplit',
|
||||||
'urlsplit'):
|
return_value=path):
|
||||||
with mock.patch.object(sitemap_file, 'time'):
|
with mock.patch.object(sitemap_file, 'time'):
|
||||||
self.spider.parse_item(response)
|
self.spider.parse_item(response)
|
||||||
|
|
||||||
|
@ -69,9 +76,17 @@ class TestSitemapSpider(unittest.TestCase):
|
||||||
|
|
||||||
def test_parse_items_gets_path(self):
|
def test_parse_items_gets_path(self):
|
||||||
response = mock.MagicMock()
|
response = mock.MagicMock()
|
||||||
|
path = sitemap_file.urlparse.SplitResult(
|
||||||
|
scheme='https',
|
||||||
|
netloc='docs.openstackorg',
|
||||||
|
path='/ocata/something.html',
|
||||||
|
query='',
|
||||||
|
fragment=''
|
||||||
|
)
|
||||||
with mock.patch.object(sitemap_file, 'SitemapItem'):
|
with mock.patch.object(sitemap_file, 'SitemapItem'):
|
||||||
with mock.patch.object(sitemap_file.urlparse,
|
with mock.patch.object(sitemap_file.urlparse,
|
||||||
'urlsplit') as mocked_urlsplit:
|
'urlsplit',
|
||||||
|
return_value=path) as mocked_urlsplit:
|
||||||
with mock.patch.object(sitemap_file, 'time'):
|
with mock.patch.object(sitemap_file, 'time'):
|
||||||
self.spider.parse_item(response)
|
self.spider.parse_item(response)
|
||||||
|
|
||||||
|
@ -81,8 +96,8 @@ class TestSitemapSpider(unittest.TestCase):
|
||||||
response = mock.MagicMock()
|
response = mock.MagicMock()
|
||||||
path = sitemap_file.urlparse.SplitResult(
|
path = sitemap_file.urlparse.SplitResult(
|
||||||
scheme='https',
|
scheme='https',
|
||||||
netloc='docs.openstack.com',
|
netloc='docs.openstack.org',
|
||||||
path='/mitaka',
|
path='/ocata/something.html',
|
||||||
query='',
|
query='',
|
||||||
fragment=''
|
fragment=''
|
||||||
)
|
)
|
||||||
|
@ -91,14 +106,14 @@ class TestSitemapSpider(unittest.TestCase):
|
||||||
with mock.patch.object(sitemap_file, 'time'):
|
with mock.patch.object(sitemap_file, 'time'):
|
||||||
returned_item = self.spider.parse_item(response)
|
returned_item = self.spider.parse_item(response)
|
||||||
|
|
||||||
self.assertEqual('0.5', returned_item['priority'])
|
self.assertEqual('1.0', returned_item['priority'])
|
||||||
self.assertEqual('weekly', returned_item['changefreq'])
|
self.assertEqual('weekly', returned_item['changefreq'])
|
||||||
|
|
||||||
def test_parse_items_high_priority_daily_freq(self):
|
def test_parse_items_high_priority_daily_freq(self):
|
||||||
response = mock.MagicMock()
|
response = mock.MagicMock()
|
||||||
path = sitemap_file.urlparse.SplitResult(
|
path = sitemap_file.urlparse.SplitResult(
|
||||||
scheme='https',
|
scheme='https',
|
||||||
netloc='docs.openstack.com',
|
netloc='docs.openstack.org',
|
||||||
path='/contributor-guide',
|
path='/contributor-guide',
|
||||||
query='',
|
query='',
|
||||||
fragment=''
|
fragment=''
|
||||||
|
@ -115,7 +130,7 @@ class TestSitemapSpider(unittest.TestCase):
|
||||||
response = mock.MagicMock()
|
response = mock.MagicMock()
|
||||||
path = sitemap_file.urlparse.SplitResult(
|
path = sitemap_file.urlparse.SplitResult(
|
||||||
scheme='https',
|
scheme='https',
|
||||||
netloc='docs.openstack.com',
|
netloc='docs.openstack.org',
|
||||||
path='/ocata',
|
path='/ocata',
|
||||||
query='',
|
query='',
|
||||||
fragment=''
|
fragment=''
|
||||||
|
|
Loading…
Reference in New Issue