Insert artificial delay between sending virtual nodes on introspection

KVM PXE code seems broken in an interesting way, when you try to PXE
boot too many nodes. This change makes inspector sleep configurable
amount of time between powering on nodes with *_ssh driver.

Work around in devstack/exercise.sh is no longer needed and is dropped.

Note that this change is not HA, so we might revisit it in the future.

Change-Id: I9b16592f9b5130e90c02fce1b421887f451e397b
Closes-Bug: #1473024
This commit is contained in:
Dmitry Tantsur 2015-07-17 15:55:34 +02:00
parent d3bd8f41d7
commit f15aee4a7f
5 changed files with 110 additions and 6 deletions

View File

@ -36,10 +36,6 @@ done
for uuid in $nodes; do
ironic node-set-provision-state $uuid inspect
# FIXME(dtantsur): virtual machines PXE often behaves weirdly when a lot of
# machines DHCP at the same time, inserting sleep helps. It does not affect
# bare metal environment AFAIK.
sleep 5
done
current_nodes=$nodes

View File

@ -56,6 +56,13 @@
# The green thread pool size. (integer value)
#max_concurrency = 1000
# Delay (in seconds) between two introspections. (integer value)
#introspection_delay = 5
# Only node with drivers matching this regular expression will be
# affected by introspection_delay setting. (string value)
#introspection_delay_drivers = ^.*_ssh$
[database]

View File

@ -210,7 +210,14 @@ SERVICE_OPTS = [
help='Path to SSL key'),
cfg.IntOpt('max_concurrency',
default=1000,
help='The green thread pool size.')
help='The green thread pool size.'),
cfg.IntOpt('introspection_delay',
default=5,
help='Delay (in seconds) between two introspections.'),
cfg.StrOpt('introspection_delay_drivers',
default='^.*_ssh$',
help='Only node with drivers matching this regular expression '
'will be affected by introspection_delay setting.'),
]

View File

@ -14,8 +14,11 @@
"""Handling introspection request."""
import logging
import re
import string
import time
from eventlet import semaphore
from ironicclient import exceptions
from oslo_config import cfg
@ -31,6 +34,10 @@ LOG = logging.getLogger("ironic_inspector.introspect")
PASSWORD_ACCEPTED_CHARS = set(string.ascii_letters + string.digits)
PASSWORD_MAX_LENGTH = 20 # IPMI v2.0
_LAST_INTROSPECTION_TIME = 0
_LAST_INTROSPECTION_LOCK = semaphore.BoundedSemaphore()
_LAST_INTROSPECTION_RE = re.compile(CONF.introspection_delay_drivers)
def _validate_ipmi_credentials(node, new_ipmi_credentials):
if not CONF.processing.enable_setting_ipmi_credentials:
@ -112,6 +119,8 @@ def introspect(uuid, new_ipmi_credentials=None):
def _background_introspect(ironic, node_info):
global _LAST_INTROSPECTION_TIME
# TODO(dtantsur): pagination
macs = list(node_info.ports(ironic))
if macs:
@ -130,6 +139,17 @@ def _background_introspect(ironic, node_info):
' node %(node)s: %(exc)s') %
{'node': node_info.uuid, 'exc': exc})
if _LAST_INTROSPECTION_RE.match(node_info.node().driver):
LOG.debug('Attempting to acquire lock on last introspection time')
with _LAST_INTROSPECTION_LOCK:
delay = (_LAST_INTROSPECTION_TIME - time.time()
+ CONF.introspection_delay)
if delay > 0:
LOG.debug('Waiting %d seconds before sending the next '
'node on introspection', delay)
time.sleep(delay)
_LAST_INTROSPECTION_TIME = time.time()
try:
utils.retry_on_conflict(ironic.node.set_power_state,
node_info.uuid, 'reboot')

View File

@ -12,6 +12,7 @@
# limitations under the License.
import collections
import time
import eventlet
from ironicclient import exceptions
@ -30,6 +31,7 @@ CONF = cfg.CONF
class BaseTest(test_base.NodeTest):
def setUp(self):
super(BaseTest, self).setUp()
introspect._LAST_INTROSPECTION_TIME = 0
self.node.power_state = 'power off'
self.node_compat = mock.Mock(driver='pxe_ssh',
uuid='uuid_compat',
@ -43,6 +45,7 @@ class BaseTest(test_base.NodeTest):
for p in self.ports)
self.node_info = mock.Mock(uuid=self.uuid, options={})
self.node_info.ports.return_value = self.ports_dict
self.node_info.node.return_value = self.node
def _prepare(self, client_mock):
cli = client_mock.return_value
@ -157,7 +160,8 @@ class TestIntrospect(BaseTest):
cli.node.get.return_value = self.node_compat
cli.node.validate.return_value = mock.Mock(power={'result': True})
add_mock.return_value = mock.Mock(uuid=self.node_compat.uuid,
options={})
options={},
**{'node.return_value': self.node})
add_mock.return_value.ports.return_value = collections.OrderedDict(
(p.address, p) for p in self.ports)
@ -247,6 +251,76 @@ class TestIntrospect(BaseTest):
self.assertEqual(0, cli.node.set_power_state.call_count)
self.assertFalse(add_mock.called)
@mock.patch.object(time, 'sleep')
@mock.patch.object(time, 'time')
def test_sleep_no_pxe_ssh(self, time_mock, sleep_mock, client_mock,
add_mock, filters_mock):
self.node.driver = 'pxe_ipmitool'
time_mock.return_value = 42
introspect._LAST_INTROSPECTION_TIME = 40
CONF.set_override('introspection_delay', 10)
cli = self._prepare(client_mock)
add_mock.return_value = self.node_info
introspect.introspect(self.uuid)
self.assertFalse(sleep_mock.called)
cli.node.set_boot_device.assert_called_once_with(self.uuid,
'pxe',
persistent=False)
cli.node.set_power_state.assert_called_once_with(self.uuid,
'reboot')
# not changed
self.assertEqual(40, introspect._LAST_INTROSPECTION_TIME)
@mock.patch.object(time, 'sleep')
@mock.patch.object(time, 'time')
def test_sleep_with_pxe_ssh(self, time_mock, sleep_mock, client_mock,
add_mock, filters_mock):
self.node.driver = 'pxe_ssh'
time_mock.return_value = 42
introspect._LAST_INTROSPECTION_TIME = 40
CONF.set_override('introspection_delay', 10)
cli = self._prepare(client_mock)
add_mock.return_value = self.node_info
introspect.introspect(self.uuid)
sleep_mock.assert_called_once_with(8)
cli.node.set_boot_device.assert_called_once_with(self.uuid,
'pxe',
persistent=False)
cli.node.set_power_state.assert_called_once_with(self.uuid,
'reboot')
# updated to the current time.time()
self.assertEqual(42, introspect._LAST_INTROSPECTION_TIME)
@mock.patch.object(time, 'sleep')
@mock.patch.object(time, 'time')
def test_sleep_not_needed_with_pxe_ssh(self, time_mock, sleep_mock,
client_mock, add_mock,
filters_mock):
self.node.driver = 'agent_ssh'
time_mock.return_value = 100
introspect._LAST_INTROSPECTION_TIME = 40
CONF.set_override('introspection_delay', 10)
cli = self._prepare(client_mock)
add_mock.return_value = self.node_info
introspect.introspect(self.uuid)
self.assertFalse(sleep_mock.called)
cli.node.set_boot_device.assert_called_once_with(self.uuid,
'pxe',
persistent=False)
cli.node.set_power_state.assert_called_once_with(self.uuid,
'reboot')
# updated to the current time.time()
self.assertEqual(100, introspect._LAST_INTROSPECTION_TIME)
@mock.patch.object(utils, 'spawn_n',
lambda f, *a, **kw: f(*a, **kw) and None)