From 6fa73eac2696e6401f2b5d4300a4bc5cb5e83d7d Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Fri, 22 Feb 2019 11:27:51 +1100 Subject: [PATCH] Update dib stats This updates dib stats after creating a dashboard to use them. Firstly, the individual return codes and runtime for each image type are unnecessary, because they call come from the same invocation of dib. While it is definitely useful to track the size of each output image, the overall status for a build is only a single value. This moves these duplciated values to ".status.". Unfortunately, there's really no way to say "what was the time of the last non-null value" in grafana+graphite [1]. This means you can't do something useful like show a singlestat of the relative time of the last build "X hours ago" using the timer value. We can work around this by putting the timestamp of the last build in a gauge value; this monotonically increases and is easy to turn into a relative time. [1] https://github.com/grafana/grafana/issues/10550 Change-Id: Ia9518b6faecb30d45e0509bda4a9b2ab7fdc6261 --- doc/source/operation.rst | 32 ++++++++++++------- nodepool/builder.py | 13 ++++---- nodepool/tests/unit/test_builder.py | 12 ++++--- .../dib-stats-update-c4efeab13753e090.yaml | 12 +++++++ 4 files changed, 48 insertions(+), 21 deletions(-) create mode 100644 releasenotes/notes/dib-stats-update-c4efeab13753e090.yaml diff --git a/doc/source/operation.rst b/doc/source/operation.rst index 6e2b51eea..06ff2925c 100644 --- a/doc/source/operation.rst +++ b/doc/source/operation.rst @@ -286,7 +286,27 @@ Nodepool builder .. zuul:stat:: nodepool.dib_image_build...size :type: gauge - This stat reports the size of the built image in bytes. + This stat reports the size of the built image in bytes. ``ext`` is + based on the formats of the images created for the build, for + example ``qcow2``, ``raw``, ``vhd``, etc. + +.. zuul:stat:: nodepool.dib_image_build..status.rc + :type: gauge + + Return code of the last DIB run. Zero is successful, non-zero is + unsuccessful. + +.. zuul:stat:: nodepool.dib_image_build..status.duration + :type: timer + + Time the last DIB run for this image build took, in ms + +.. zuul:stat:: nodepool.dib_image_build..status.last_build + :type: gauge + + The UNIX timestamp of the last time a build for this image + returned. This can be useful for presenting a relative time ("X + hours ago") in a dashboard. .. zuul:stat:: nodepool.image_update.. :type: counter, timer @@ -294,16 +314,6 @@ Nodepool builder Number of image uploads to a specific provider in the cloud plus the time in seconds spent to upload the image. -.. zuul:stat:: nodepool.dib_image_build...rc - :type: gauge - - Return code of the DIB. - -.. zuul:stat:: nodepool.dib_image_build...duration - :type: timer - - Time the DIB run took in ms - Nodepool launcher ~~~~~~~~~~~~~~~~~ diff --git a/nodepool/builder.py b/nodepool/builder.py index 2f6f4de20..b9a9d439d 100755 --- a/nodepool/builder.py +++ b/nodepool/builder.py @@ -888,12 +888,13 @@ class BuildWorker(BaseWorker): if self._statsd: # report result to statsd - for ext in img_types.split(','): - key_base = 'nodepool.dib_image_build.%s.%s' % ( - diskimage.name, ext) - pipeline.gauge(key_base + '.rc', rc) - pipeline.timing(key_base + '.duration', - int(build_time * 1000)) + key_base = 'nodepool.dib_image_build.%s.status' % ( + diskimage.name) + pipeline.timing(key_base + '.duration', + int(build_time * 1000)) + pipeline.gauge(key_base + '.rc', rc) + pipeline.gauge(key_base + '.last_build', + int(time.time())) pipeline.send() return build_data diff --git a/nodepool/tests/unit/test_builder.py b/nodepool/tests/unit/test_builder.py index 9a7dad4da..8e358c8e4 100644 --- a/nodepool/tests/unit/test_builder.py +++ b/nodepool/tests/unit/test_builder.py @@ -308,10 +308,11 @@ class TestNodePoolBuilder(tests.DBTestCase): self.waitForImage('fake-provider', 'fake-image') # Make sure our cleanup worker properly removes the first build. self.waitForBuildDeletion('fake-image', '0000000001') - self.assertReportedStat('nodepool.dib_image_build.fake-image.qcow2.rc', + self.assertReportedStat('nodepool.dib_image_build.' + 'fake-image.status.rc', '127', 'g') self.assertReportedStat('nodepool.dib_image_build.' - 'fake-image.qcow2.duration', None, 'ms') + 'fake-image.status.duration', None, 'ms') def test_diskimage_build_only(self): configfile = self.setup_config('node_diskimage_only.yaml') @@ -322,12 +323,15 @@ class TestNodePoolBuilder(tests.DBTestCase): self.assertEqual(build_tar._formats, ['tar']) self.assertEqual(build_default._formats, ['qcow2']) - self.assertReportedStat('nodepool.dib_image_build.fake-image.tar.rc', + self.assertReportedStat('nodepool.dib_image_build.' + 'fake-image.status.rc', '0', 'g') self.assertReportedStat('nodepool.dib_image_build.' - 'fake-image.tar.duration', None, 'ms') + 'fake-image.status.duration', None, 'ms') self.assertReportedStat('nodepool.dib_image_build.' 'fake-image.tar.size', '4096', 'g') + self.assertReportedStat('nodepool.dib_image_build.' + 'fake-image.status.last_build', None, 'g') def test_diskimage_build_formats(self): configfile = self.setup_config('node_diskimage_formats.yaml') diff --git a/releasenotes/notes/dib-stats-update-c4efeab13753e090.yaml b/releasenotes/notes/dib-stats-update-c4efeab13753e090.yaml new file mode 100644 index 000000000..d3d598d24 --- /dev/null +++ b/releasenotes/notes/dib-stats-update-c4efeab13753e090.yaml @@ -0,0 +1,12 @@ +--- +upgrade: + - The diskimage-builder stats have been reworked to be more useful. + The return code and duration is now stored in + ``nodepool.dib_image-build..status.``; + previously this was split for each image format. This is + unnecessary and confusing since the results will always be the + same, since all formats are generated from the same + diskimage-builder run. An additional gauge + ``nodepool.dib_image_build..status.last_build`` is + added to make it easy to show relative time of builds in + dashboards.