Browse Source

Change priorities for sitemap

Downgrade old EOL releases, give current maintained releases and
development versions a higher priority.

this needs changes to the test framework, update mocking. Also, rename
URL to be docs.openstack.org and not .com.

Change-Id: I2c2c0408e203a65a9541baaf55ffe60694463975
tags/1.6.0
Andreas Jaeger 2 years ago
parent
commit
6898821aeb
2 changed files with 48 additions and 18 deletions
  1. 25
    10
      sitemap/generator/spiders/sitemap_file.py
  2. 23
    8
      test/test_sitemap_file.py

+ 25
- 10
sitemap/generator/spiders/sitemap_file.py View File

@@ -10,6 +10,7 @@
10 10
 # License for the specific language governing permissions and limitations
11 11
 # under the License.
12 12
 
13
+import re
13 14
 import time
14 15
 try:
15 16
     import urlparse
@@ -31,7 +32,8 @@ class SitemapItem(item.Item):
31 32
 
32 33
 class SitemapSpider(spiders.CrawlSpider):
33 34
     name = 'sitemap'
34
-    old_releases = tuple(["/%s" % old_release for old_release in [
35
+
36
+    EOL_SERIES = [
35 37
         'austin',
36 38
         'bexar',
37 39
         'cactus',
@@ -44,10 +46,16 @@ class SitemapSpider(spiders.CrawlSpider):
44 46
         'juno',
45 47
         'kilo',
46 48
         'liberty',
47
-        'mitaka',
49
+        'mitaka'
50
+    ]
51
+    EOL_RELEASES_PAT = re.compile('^/(' + '|'.join(EOL_SERIES) + ')/')
52
+    MAINT_SERIES = [
48 53
         'newton',
49
-        'ocata'
50
-    ]])
54
+        'ocata',
55
+        'pike'
56
+    ]
57
+    MAINT_RELEASES_PAT = re.compile('^/(' + '|'.join(MAINT_SERIES) + ')/')
58
+    LATEST_PAT = re.compile('^/latest/')
51 59
 
52 60
     rules = [
53 61
         spiders.Rule(
@@ -62,9 +70,6 @@ class SitemapSpider(spiders.CrawlSpider):
62 70
                 deny=[
63 71
                     r'/trunk/',
64 72
                     r'/draft/',
65
-                    r'/api/',
66
-                    r'/juno/',
67
-                    r'/icehouse/'
68 73
                 ]
69 74
             ),
70 75
             follow=True, callback='parse_item'
@@ -86,11 +91,21 @@ class SitemapSpider(spiders.CrawlSpider):
86 91
         item['loc'] = response.url
87 92
 
88 93
         path = urlparse.urlsplit(response.url).path
89
-        if path.startswith(self.old_releases):
90
-            # weekly changefrequency and lower priority for old files
91
-            item['priority'] = '0.5'
94
+
95
+        if self.MAINT_RELEASES_PAT.match(path):
96
+            # weekly changefrequency and highest prio for maintained release
97
+            item['priority'] = '1.0'
92 98
             item['changefreq'] = 'weekly'
99
+        elif self.LATEST_PAT.match(path):
100
+            # daily changefrequency and high priority for current files
101
+            item['priority'] = '0.8'
102
+            item['changefreq'] = 'daily'
103
+        elif self.EOL_RELEASES_PAT.match(path):
104
+            # yearly changefrequency and lowest priority for old stable files
105
+            item['priority'] = '0.1'
106
+            item['changefreq'] = 'yearly'
93 107
         else:
108
+            # These are unversioned documents
94 109
             # daily changefrequency and highest priority for current files
95 110
             item['priority'] = '1.0'
96 111
             item['changefreq'] = 'daily'

+ 23
- 8
test/test_sitemap_file.py View File

@@ -58,10 +58,17 @@ class TestSitemapSpider(unittest.TestCase):
58 58
 
59 59
     def test_parse_items_inits_sitemap(self):
60 60
         response = mock.MagicMock()
61
+        path = sitemap_file.urlparse.SplitResult(
62
+            scheme='https',
63
+            netloc='docs.openstack.org',
64
+            path='/ocata/something.html',
65
+            query='',
66
+            fragment=''
67
+        )
61 68
         with mock.patch.object(sitemap_file,
62 69
                                'SitemapItem') as mocked_sitemap_item:
63
-            with mock.patch.object(sitemap_file.urlparse,
64
-                                   'urlsplit'):
70
+            with mock.patch.object(sitemap_file.urlparse, 'urlsplit',
71
+                                   return_value=path):
65 72
                 with mock.patch.object(sitemap_file, 'time'):
66 73
                     self.spider.parse_item(response)
67 74
 
@@ -69,9 +76,17 @@ class TestSitemapSpider(unittest.TestCase):
69 76
 
70 77
     def test_parse_items_gets_path(self):
71 78
         response = mock.MagicMock()
79
+        path = sitemap_file.urlparse.SplitResult(
80
+            scheme='https',
81
+            netloc='docs.openstackorg',
82
+            path='/ocata/something.html',
83
+            query='',
84
+            fragment=''
85
+        )
72 86
         with mock.patch.object(sitemap_file, 'SitemapItem'):
73 87
             with mock.patch.object(sitemap_file.urlparse,
74
-                                   'urlsplit') as mocked_urlsplit:
88
+                                   'urlsplit',
89
+                                   return_value=path) as mocked_urlsplit:
75 90
                 with mock.patch.object(sitemap_file, 'time'):
76 91
                     self.spider.parse_item(response)
77 92
 
@@ -81,8 +96,8 @@ class TestSitemapSpider(unittest.TestCase):
81 96
         response = mock.MagicMock()
82 97
         path = sitemap_file.urlparse.SplitResult(
83 98
             scheme='https',
84
-            netloc='docs.openstack.com',
85
-            path='/mitaka',
99
+            netloc='docs.openstack.org',
100
+            path='/ocata/something.html',
86 101
             query='',
87 102
             fragment=''
88 103
         )
@@ -91,14 +106,14 @@ class TestSitemapSpider(unittest.TestCase):
91 106
             with mock.patch.object(sitemap_file, 'time'):
92 107
                 returned_item = self.spider.parse_item(response)
93 108
 
94
-        self.assertEqual('0.5', returned_item['priority'])
109
+        self.assertEqual('1.0', returned_item['priority'])
95 110
         self.assertEqual('weekly', returned_item['changefreq'])
96 111
 
97 112
     def test_parse_items_high_priority_daily_freq(self):
98 113
         response = mock.MagicMock()
99 114
         path = sitemap_file.urlparse.SplitResult(
100 115
             scheme='https',
101
-            netloc='docs.openstack.com',
116
+            netloc='docs.openstack.org',
102 117
             path='/contributor-guide',
103 118
             query='',
104 119
             fragment=''
@@ -115,7 +130,7 @@ class TestSitemapSpider(unittest.TestCase):
115 130
         response = mock.MagicMock()
116 131
         path = sitemap_file.urlparse.SplitResult(
117 132
             scheme='https',
118
-            netloc='docs.openstack.com',
133
+            netloc='docs.openstack.org',
119 134
             path='/ocata',
120 135
             query='',
121 136
             fragment=''

Loading…
Cancel
Save