From c68dbb9636c8c1424f67e0920132d4047bcafc99 Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Thu, 21 Feb 2019 07:56:30 +1100 Subject: [PATCH] Use a pipeline for dib stats I noticed in OpenStack production we don't seem to be getting all the stats from dib, particularly from our very remote builder. This is likely because there is some packet loss quickly blasting out small UDP packets with the stats. A pipeline bundles the stats together into the largest size packets it can (this has been a problem before; see I3f68450c7164d1cf0f1f57f9a31e5dca2f72bc43). Add some additional checks for the size stats which did not seem to be covered by existing testing. I also noticed that the documentation had an extra ".builder." in the key which isn't actually there in the stats output. Change-Id: Ib744f19385906d1e72231958d11c98f15b72d6bd --- doc/source/operation.rst | 6 +++--- nodepool/builder.py | 12 ++++++++---- nodepool/tests/unit/test_builder.py | 7 +++++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/doc/source/operation.rst b/doc/source/operation.rst index 470e9f9bf..6e2b51eea 100644 --- a/doc/source/operation.rst +++ b/doc/source/operation.rst @@ -294,15 +294,15 @@ Nodepool builder Number of image uploads to a specific provider in the cloud plus the time in seconds spent to upload the image. -.. zuul:stat:: nodepool.builder.dib_image_build...rc +.. zuul:stat:: nodepool.dib_image_build...rc :type: gauge Return code of the DIB. -.. zuul:stat:: nodepool.builder.dib_image_build...duration +.. zuul:stat:: nodepool.dib_image_build...duration :type: timer - Time the DIB run took. + Time the DIB run took in ms Nodepool launcher ~~~~~~~~~~~~~~~~~ diff --git a/nodepool/builder.py b/nodepool/builder.py index 196e6e052..2f6f4de20 100755 --- a/nodepool/builder.py +++ b/nodepool/builder.py @@ -854,6 +854,9 @@ class BuildWorker(BaseWorker): build_data.builder = self._hostname build_data.username = diskimage.username + if self._statsd: + pipeline = self._statsd.pipeline() + if self._zk.didLoseConnection: self.log.info("ZooKeeper lost while building %s" % diskimage.name) self._zk.resetLostFlag() @@ -881,16 +884,17 @@ class BuildWorker(BaseWorker): size = os.stat("%s.%s" % (filename, ext)).st_blocks * 512 self.log.debug("%s created image %s.%s (size: %d) " % (diskimage.name, filename, ext, size)) - self._statsd.gauge(key, size) + pipeline.gauge(key, size) if self._statsd: # report result to statsd for ext in img_types.split(','): key_base = 'nodepool.dib_image_build.%s.%s' % ( diskimage.name, ext) - self._statsd.gauge(key_base + '.rc', rc) - self._statsd.timing(key_base + '.duration', - int(build_time * 1000)) + pipeline.gauge(key_base + '.rc', rc) + pipeline.timing(key_base + '.duration', + int(build_time * 1000)) + pipeline.send() return build_data diff --git a/nodepool/tests/unit/test_builder.py b/nodepool/tests/unit/test_builder.py index 1379ec0f8..9a7dad4da 100644 --- a/nodepool/tests/unit/test_builder.py +++ b/nodepool/tests/unit/test_builder.py @@ -326,6 +326,8 @@ class TestNodePoolBuilder(tests.DBTestCase): '0', 'g') self.assertReportedStat('nodepool.dib_image_build.' 'fake-image.tar.duration', None, 'ms') + self.assertReportedStat('nodepool.dib_image_build.' + 'fake-image.tar.size', '4096', 'g') def test_diskimage_build_formats(self): configfile = self.setup_config('node_diskimage_formats.yaml') @@ -336,6 +338,11 @@ class TestNodePoolBuilder(tests.DBTestCase): self.assertEqual(build_default._formats, ['qcow2']) self.assertEqual(build_vhd._formats, ['vhd']) + self.assertReportedStat('nodepool.dib_image_build.' + 'fake-image-default-format.qcow2.size', + '4096', 'g') + self.assertReportedStat('nodepool.dib_image_build.' + 'fake-image-vhd.vhd.size', '4096', 'g') @mock.patch('select.poll') def test_diskimage_build_timeout(self, mock_poll):