diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index ba4c971069..d6371545ec 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -2898,9 +2898,12 @@ class ConductorManager(base_manager.BaseConductorManager): @periodics.periodic(spacing=CONF.conductor.send_sensor_data_interval, enabled=CONF.conductor.send_sensor_data) def _send_sensor_data(self, context): - """Periodically sends sensor data to Ceilometer.""" + """Periodically collects and transmits sensor data notifications.""" + + filters = {} + if not CONF.conductor.send_sensor_data_for_undeployed_nodes: + filters['provision_state'] = states.ACTIVE - filters = {'associated': True} nodes = queue.Queue() for node_info in self.iter_nodes(fields=['instance_uuid'], filters=filters): diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index ae7ed983cb..adecee58d6 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -125,6 +125,16 @@ opts = [ help=_('List of comma separated meter types which need to be' ' sent to Ceilometer. The default value, "ALL", is a ' 'special value meaning send all the sensor data.')), + cfg.BoolOpt('send_sensor_data_for_undeployed_nodes', + default=False, + help=_('The default for sensor data collection is to only ' + 'collect data for machines that are deployed, however ' + 'operators may desire to know if there are failures ' + 'in hardware that is not presently in use. ' + 'When set to true, the conductor will collect sensor ' + 'information from all nodes when sensor data ' + 'collection is enabled via the send_sensor_data ' + 'setting.')), cfg.IntOpt('sync_local_state_interval', default=180, help=_('When conductors join or leave the cluster, existing ' diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index de282fb328..a38efd1939 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -5720,6 +5720,10 @@ class SensorsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.assertEqual(number_of_workers, mock_spawn.call_count) + # TODO(TheJulia): At some point, we should add a test to validate that + # that a modified filter to return all nodes actually works, although + # the way the sensor tests are written, the list is all mocked. + @mgr_utils.mock_record_keepalive class BootDeviceTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): diff --git a/releasenotes/notes/send-sensor-data-for-all-nodes-a732d9df43e74318.yaml b/releasenotes/notes/send-sensor-data-for-all-nodes-a732d9df43e74318.yaml new file mode 100644 index 0000000000..a55c43b53d --- /dev/null +++ b/releasenotes/notes/send-sensor-data-for-all-nodes-a732d9df43e74318.yaml @@ -0,0 +1,19 @@ +--- +features: + - | + Adds a ``[conductor]send_sensor_data_for_undeployed_nodes`` option to + enable ironic to collect and transmit sensor data for all nodes + for which sensor data collection is available. By default, this option + is not enabled which aligns with the prior behavior of sensor data + collection and transmission where such data was only collected + if an ``instance_uuid`` was present to signify that the node has been or + is being deployed. With this change set to ``True``, operators may be able + to identify hardware in a faulty state through the sensor data and take + action before an instance workload is deployed. +fixes: + - | + Fixes an issue where nodes in the process of deployment may have metrics + data collected and transmitted during the deployment process which + may erroneously generate alarms depending on the operator's monitoring + configuration. This was due to a database filter relying upon the + indicator of an ``instance_uuid`` as opposed to the state of a node.