Browse Source

Use a pipeline for dib stats

I noticed in OpenStack production we don't seem to be getting all the
stats from dib, particularly from our very remote builder.  This is
likely because there is some packet loss quickly blasting out small
UDP packets with the stats.  A pipeline bundles the stats together
into the largest size packets it can (this has been a problem before;
see I3f68450c7164d1cf0f1f57f9a31e5dca2f72bc43).

Add some additional checks for the size stats which did not seem to be
covered by existing testing.

I also noticed that the documentation had an extra ".builder." in the
key which isn't actually there in the stats output.

Change-Id: Ib744f19385906d1e72231958d11c98f15b72d6bd
tags/3.5.0
Ian Wienand 3 months ago
parent
commit
c68dbb9636
3 changed files with 18 additions and 7 deletions
  1. 3
    3
      doc/source/operation.rst
  2. 8
    4
      nodepool/builder.py
  3. 7
    0
      nodepool/tests/unit/test_builder.py

+ 3
- 3
doc/source/operation.rst View File

@@ -294,15 +294,15 @@ Nodepool builder
294 294
    Number of image uploads to a specific provider in the cloud plus the time in
295 295
    seconds spent to upload the image.
296 296
 
297
-.. zuul:stat:: nodepool.builder.dib_image_build.<diskimage_name>.<ext>.rc
297
+.. zuul:stat:: nodepool.dib_image_build.<diskimage_name>.<ext>.rc
298 298
    :type: gauge
299 299
 
300 300
    Return code of the DIB.
301 301
 
302
-.. zuul:stat:: nodepool.builder.dib_image_build.<diskimage_name>.<ext>.duration
302
+.. zuul:stat:: nodepool.dib_image_build.<diskimage_name>.<ext>.duration
303 303
    :type: timer
304 304
 
305
-   Time the DIB run took.
305
+   Time the DIB run took in ms
306 306
 
307 307
 Nodepool launcher
308 308
 ~~~~~~~~~~~~~~~~~

+ 8
- 4
nodepool/builder.py View File

@@ -854,6 +854,9 @@ class BuildWorker(BaseWorker):
854 854
         build_data.builder = self._hostname
855 855
         build_data.username = diskimage.username
856 856
 
857
+        if self._statsd:
858
+            pipeline = self._statsd.pipeline()
859
+
857 860
         if self._zk.didLoseConnection:
858 861
             self.log.info("ZooKeeper lost while building %s" % diskimage.name)
859 862
             self._zk.resetLostFlag()
@@ -881,16 +884,17 @@ class BuildWorker(BaseWorker):
881 884
                     size = os.stat("%s.%s" % (filename, ext)).st_blocks * 512
882 885
                     self.log.debug("%s created image %s.%s (size: %d) " %
883 886
                                    (diskimage.name, filename, ext, size))
884
-                    self._statsd.gauge(key, size)
887
+                    pipeline.gauge(key, size)
885 888
 
886 889
         if self._statsd:
887 890
             # report result to statsd
888 891
             for ext in img_types.split(','):
889 892
                 key_base = 'nodepool.dib_image_build.%s.%s' % (
890 893
                     diskimage.name, ext)
891
-                self._statsd.gauge(key_base + '.rc', rc)
892
-                self._statsd.timing(key_base + '.duration',
893
-                                    int(build_time * 1000))
894
+                pipeline.gauge(key_base + '.rc', rc)
895
+                pipeline.timing(key_base + '.duration',
896
+                                int(build_time * 1000))
897
+            pipeline.send()
894 898
 
895 899
         return build_data
896 900
 

+ 7
- 0
nodepool/tests/unit/test_builder.py View File

@@ -326,6 +326,8 @@ class TestNodePoolBuilder(tests.DBTestCase):
326 326
                                 '0', 'g')
327 327
         self.assertReportedStat('nodepool.dib_image_build.'
328 328
                                 'fake-image.tar.duration', None, 'ms')
329
+        self.assertReportedStat('nodepool.dib_image_build.'
330
+                                'fake-image.tar.size', '4096', 'g')
329 331
 
330 332
     def test_diskimage_build_formats(self):
331 333
         configfile = self.setup_config('node_diskimage_formats.yaml')
@@ -336,6 +338,11 @@ class TestNodePoolBuilder(tests.DBTestCase):
336 338
 
337 339
         self.assertEqual(build_default._formats, ['qcow2'])
338 340
         self.assertEqual(build_vhd._formats, ['vhd'])
341
+        self.assertReportedStat('nodepool.dib_image_build.'
342
+                                'fake-image-default-format.qcow2.size',
343
+                                '4096', 'g')
344
+        self.assertReportedStat('nodepool.dib_image_build.'
345
+                                'fake-image-vhd.vhd.size', '4096', 'g')
339 346
 
340 347
     @mock.patch('select.poll')
341 348
     def test_diskimage_build_timeout(self, mock_poll):

Loading…
Cancel
Save