From f15aee4a7f8531eb88a78789e3765858998c409a Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Fri, 17 Jul 2015 15:55:34 +0200 Subject: [PATCH] Insert artificial delay between sending virtual nodes on introspection KVM PXE code seems broken in an interesting way, when you try to PXE boot too many nodes. This change makes inspector sleep configurable amount of time between powering on nodes with *_ssh driver. Work around in devstack/exercise.sh is no longer needed and is dropped. Note that this change is not HA, so we might revisit it in the future. Change-Id: I9b16592f9b5130e90c02fce1b421887f451e397b Closes-Bug: #1473024 --- devstack/exercise.sh | 4 -- example.conf | 7 +++ ironic_inspector/conf.py | 9 ++- ironic_inspector/introspect.py | 20 +++++++ ironic_inspector/test/test_introspect.py | 76 +++++++++++++++++++++++- 5 files changed, 110 insertions(+), 6 deletions(-) diff --git a/devstack/exercise.sh b/devstack/exercise.sh index b346483..92dd6d6 100755 --- a/devstack/exercise.sh +++ b/devstack/exercise.sh @@ -36,10 +36,6 @@ done for uuid in $nodes; do ironic node-set-provision-state $uuid inspect - # FIXME(dtantsur): virtual machines PXE often behaves weirdly when a lot of - # machines DHCP at the same time, inserting sleep helps. It does not affect - # bare metal environment AFAIK. - sleep 5 done current_nodes=$nodes diff --git a/example.conf b/example.conf index c629bdd..ec6d932 100644 --- a/example.conf +++ b/example.conf @@ -56,6 +56,13 @@ # The green thread pool size. (integer value) #max_concurrency = 1000 +# Delay (in seconds) between two introspections. (integer value) +#introspection_delay = 5 + +# Only node with drivers matching this regular expression will be +# affected by introspection_delay setting. (string value) +#introspection_delay_drivers = ^.*_ssh$ + [database] diff --git a/ironic_inspector/conf.py b/ironic_inspector/conf.py index 2d34591..859a0c8 100644 --- a/ironic_inspector/conf.py +++ b/ironic_inspector/conf.py @@ -210,7 +210,14 @@ SERVICE_OPTS = [ help='Path to SSL key'), cfg.IntOpt('max_concurrency', default=1000, - help='The green thread pool size.') + help='The green thread pool size.'), + cfg.IntOpt('introspection_delay', + default=5, + help='Delay (in seconds) between two introspections.'), + cfg.StrOpt('introspection_delay_drivers', + default='^.*_ssh$', + help='Only node with drivers matching this regular expression ' + 'will be affected by introspection_delay setting.'), ] diff --git a/ironic_inspector/introspect.py b/ironic_inspector/introspect.py index 80cb763..da2fe8f 100644 --- a/ironic_inspector/introspect.py +++ b/ironic_inspector/introspect.py @@ -14,8 +14,11 @@ """Handling introspection request.""" import logging +import re import string +import time +from eventlet import semaphore from ironicclient import exceptions from oslo_config import cfg @@ -31,6 +34,10 @@ LOG = logging.getLogger("ironic_inspector.introspect") PASSWORD_ACCEPTED_CHARS = set(string.ascii_letters + string.digits) PASSWORD_MAX_LENGTH = 20 # IPMI v2.0 +_LAST_INTROSPECTION_TIME = 0 +_LAST_INTROSPECTION_LOCK = semaphore.BoundedSemaphore() +_LAST_INTROSPECTION_RE = re.compile(CONF.introspection_delay_drivers) + def _validate_ipmi_credentials(node, new_ipmi_credentials): if not CONF.processing.enable_setting_ipmi_credentials: @@ -112,6 +119,8 @@ def introspect(uuid, new_ipmi_credentials=None): def _background_introspect(ironic, node_info): + global _LAST_INTROSPECTION_TIME + # TODO(dtantsur): pagination macs = list(node_info.ports(ironic)) if macs: @@ -130,6 +139,17 @@ def _background_introspect(ironic, node_info): ' node %(node)s: %(exc)s') % {'node': node_info.uuid, 'exc': exc}) + if _LAST_INTROSPECTION_RE.match(node_info.node().driver): + LOG.debug('Attempting to acquire lock on last introspection time') + with _LAST_INTROSPECTION_LOCK: + delay = (_LAST_INTROSPECTION_TIME - time.time() + + CONF.introspection_delay) + if delay > 0: + LOG.debug('Waiting %d seconds before sending the next ' + 'node on introspection', delay) + time.sleep(delay) + _LAST_INTROSPECTION_TIME = time.time() + try: utils.retry_on_conflict(ironic.node.set_power_state, node_info.uuid, 'reboot') diff --git a/ironic_inspector/test/test_introspect.py b/ironic_inspector/test/test_introspect.py index d83b4bc..6bcca01 100644 --- a/ironic_inspector/test/test_introspect.py +++ b/ironic_inspector/test/test_introspect.py @@ -12,6 +12,7 @@ # limitations under the License. import collections +import time import eventlet from ironicclient import exceptions @@ -30,6 +31,7 @@ CONF = cfg.CONF class BaseTest(test_base.NodeTest): def setUp(self): super(BaseTest, self).setUp() + introspect._LAST_INTROSPECTION_TIME = 0 self.node.power_state = 'power off' self.node_compat = mock.Mock(driver='pxe_ssh', uuid='uuid_compat', @@ -43,6 +45,7 @@ class BaseTest(test_base.NodeTest): for p in self.ports) self.node_info = mock.Mock(uuid=self.uuid, options={}) self.node_info.ports.return_value = self.ports_dict + self.node_info.node.return_value = self.node def _prepare(self, client_mock): cli = client_mock.return_value @@ -157,7 +160,8 @@ class TestIntrospect(BaseTest): cli.node.get.return_value = self.node_compat cli.node.validate.return_value = mock.Mock(power={'result': True}) add_mock.return_value = mock.Mock(uuid=self.node_compat.uuid, - options={}) + options={}, + **{'node.return_value': self.node}) add_mock.return_value.ports.return_value = collections.OrderedDict( (p.address, p) for p in self.ports) @@ -247,6 +251,76 @@ class TestIntrospect(BaseTest): self.assertEqual(0, cli.node.set_power_state.call_count) self.assertFalse(add_mock.called) + @mock.patch.object(time, 'sleep') + @mock.patch.object(time, 'time') + def test_sleep_no_pxe_ssh(self, time_mock, sleep_mock, client_mock, + add_mock, filters_mock): + self.node.driver = 'pxe_ipmitool' + time_mock.return_value = 42 + introspect._LAST_INTROSPECTION_TIME = 40 + CONF.set_override('introspection_delay', 10) + + cli = self._prepare(client_mock) + add_mock.return_value = self.node_info + + introspect.introspect(self.uuid) + + self.assertFalse(sleep_mock.called) + cli.node.set_boot_device.assert_called_once_with(self.uuid, + 'pxe', + persistent=False) + cli.node.set_power_state.assert_called_once_with(self.uuid, + 'reboot') + # not changed + self.assertEqual(40, introspect._LAST_INTROSPECTION_TIME) + + @mock.patch.object(time, 'sleep') + @mock.patch.object(time, 'time') + def test_sleep_with_pxe_ssh(self, time_mock, sleep_mock, client_mock, + add_mock, filters_mock): + self.node.driver = 'pxe_ssh' + time_mock.return_value = 42 + introspect._LAST_INTROSPECTION_TIME = 40 + CONF.set_override('introspection_delay', 10) + + cli = self._prepare(client_mock) + add_mock.return_value = self.node_info + + introspect.introspect(self.uuid) + + sleep_mock.assert_called_once_with(8) + cli.node.set_boot_device.assert_called_once_with(self.uuid, + 'pxe', + persistent=False) + cli.node.set_power_state.assert_called_once_with(self.uuid, + 'reboot') + # updated to the current time.time() + self.assertEqual(42, introspect._LAST_INTROSPECTION_TIME) + + @mock.patch.object(time, 'sleep') + @mock.patch.object(time, 'time') + def test_sleep_not_needed_with_pxe_ssh(self, time_mock, sleep_mock, + client_mock, add_mock, + filters_mock): + self.node.driver = 'agent_ssh' + time_mock.return_value = 100 + introspect._LAST_INTROSPECTION_TIME = 40 + CONF.set_override('introspection_delay', 10) + + cli = self._prepare(client_mock) + add_mock.return_value = self.node_info + + introspect.introspect(self.uuid) + + self.assertFalse(sleep_mock.called) + cli.node.set_boot_device.assert_called_once_with(self.uuid, + 'pxe', + persistent=False) + cli.node.set_power_state.assert_called_once_with(self.uuid, + 'reboot') + # updated to the current time.time() + self.assertEqual(100, introspect._LAST_INTROSPECTION_TIME) + @mock.patch.object(utils, 'spawn_n', lambda f, *a, **kw: f(*a, **kw) and None)