From 68ba345520926bd279d64ff721002a6eebd744f6 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Fri, 29 Mar 2019 09:38:56 -0700 Subject: [PATCH] Make it possible to send sensor data for all nodes Presently the data collection defaults to only permit sensor data to be collected and transmitted as notifications for instances deployed via nova, however standalone operators or general data center operators may find the sensor data useful to identify undeployed failing hardware and overall check the hardware health. Adds a boolean to control the filter being set for a deployed node. Change-Id: I345f6e3a9f47d8d09ea488d64927fd0c5fb7dfc7 --- ironic/conductor/manager.py | 7 +++++-- ironic/conf/conductor.py | 10 ++++++++++ ironic/tests/unit/conductor/test_manager.py | 4 ++++ ...r-data-for-all-nodes-a732d9df43e74318.yaml | 19 +++++++++++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/send-sensor-data-for-all-nodes-a732d9df43e74318.yaml diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 8a453c51b6..a3c9697a03 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -2888,9 +2888,12 @@ class ConductorManager(base_manager.BaseConductorManager): @periodics.periodic(spacing=CONF.conductor.send_sensor_data_interval, enabled=CONF.conductor.send_sensor_data) def _send_sensor_data(self, context): - """Periodically sends sensor data to Ceilometer.""" + """Periodically collects and transmits sensor data notifications.""" + + filters = {} + if not CONF.conductor.send_sensor_data_for_undeployed_nodes: + filters['provision_state'] = states.ACTIVE - filters = {'associated': True} nodes = queue.Queue() for node_info in self.iter_nodes(fields=['instance_uuid'], filters=filters): diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index ae7ed983cb..adecee58d6 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -125,6 +125,16 @@ opts = [ help=_('List of comma separated meter types which need to be' ' sent to Ceilometer. The default value, "ALL", is a ' 'special value meaning send all the sensor data.')), + cfg.BoolOpt('send_sensor_data_for_undeployed_nodes', + default=False, + help=_('The default for sensor data collection is to only ' + 'collect data for machines that are deployed, however ' + 'operators may desire to know if there are failures ' + 'in hardware that is not presently in use. ' + 'When set to true, the conductor will collect sensor ' + 'information from all nodes when sensor data ' + 'collection is enabled via the send_sensor_data ' + 'setting.')), cfg.IntOpt('sync_local_state_interval', default=180, help=_('When conductors join or leave the cluster, existing ' diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 39c2a3db32..fd9148bf5c 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -5701,6 +5701,10 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.assertEqual(number_of_workers, mock_spawn.call_count) + # TODO(TheJulia): At some point, we should add a test to validate that + # that a modified filter to return all nodes actually works, although + # the way the sensor tests are written, the list is all mocked. + @mgr_utils.mock_record_keepalive class BootDeviceTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): diff --git a/releasenotes/notes/send-sensor-data-for-all-nodes-a732d9df43e74318.yaml b/releasenotes/notes/send-sensor-data-for-all-nodes-a732d9df43e74318.yaml new file mode 100644 index 0000000000..a55c43b53d --- /dev/null +++ b/releasenotes/notes/send-sensor-data-for-all-nodes-a732d9df43e74318.yaml @@ -0,0 +1,19 @@ +--- +features: + - | + Adds a ``[conductor]send_sensor_data_for_undeployed_nodes`` option to + enable ironic to collect and transmit sensor data for all nodes + for which sensor data collection is available. By default, this option + is not enabled which aligns with the prior behavior of sensor data + collection and transmission where such data was only collected + if an ``instance_uuid`` was present to signify that the node has been or + is being deployed. With this change set to ``True``, operators may be able + to identify hardware in a faulty state through the sensor data and take + action before an instance workload is deployed. +fixes: + - | + Fixes an issue where nodes in the process of deployment may have metrics + data collected and transmitted during the deployment process which + may erroneously generate alarms depending on the operator's monitoring + configuration. This was due to a database filter relying upon the + indicator of an ``instance_uuid`` as opposed to the state of a node.