Merge "Allow retrying PXE boot if it takes too long"

2019-09-23 12:58:26 +00:00 · 2019-09-23 12:58:26 +00:00 · 0450c4f2fd
commit 0450c4f2fd
parent 9de0a66aab b2834e6661
9 changed files with 279 additions and 56 deletions
--- a/devstack/lib/ironic
+++ b/devstack/lib/ironic
@ -88,9 +88,19 @@ IRONIC_USE_MOD_WSGI=$(trueorfalse $ENABLE_HTTPD_MOD_WSGI_SERVICES IRONIC_USE_MOD
 # Defaults to the (now confusingly named) IRONIC_USE_MOD_WSGI for backward compat
 IRONIC_USE_WSGI=$(trueorfalse $IRONIC_USE_MOD_WSGI IRONIC_USE_WSGI)

+# Whether DevStack will be setup for bare metal or VMs
+IRONIC_IS_HARDWARE=$(trueorfalse False IRONIC_IS_HARDWARE)
+
 # Deploy callback timeout can be changed from its default (1800), if required.
 IRONIC_CALLBACK_TIMEOUT=${IRONIC_CALLBACK_TIMEOUT:-}

+# Timeout before retrying PXE boot. Set low to help the CI.
+if [[ "$IRONIC_IS_HARDWARE" == False ]]; then
+    IRONIC_PXE_BOOT_RETRY_TIMEOUT=${IRONIC_PXE_BOOT_RETRY_TIMEOUT:-600}
+else
+    IRONIC_PXE_BOOT_RETRY_TIMEOUT=${IRONIC_PXE_BOOT_RETRY_TIMEOUT:-}
+fi
+
 # Ping timeout after the node becomes active
 IRONIC_PING_TIMEOUT=${IRONIC_PING_TIMEOUT:-}

@ -327,9 +337,6 @@ IRONIC_HTTP_PORT=${IRONIC_HTTP_PORT:-3928}
 IRONIC_RPC_TRANSPORT=${IRONIC_RPC_TRANSPORT:-oslo}
 IRONIC_JSON_RPC_PORT=${IRONIC_JSON_RPC_PORT:-8089}

-# Whether DevStack will be setup for bare metal or VMs
-IRONIC_IS_HARDWARE=$(trueorfalse False IRONIC_IS_HARDWARE)
-
 # The first port in the range to bind the Virtual BMCs. The number of
 # ports that will be used depends on $IRONIC_VM_COUNT variable, e.g if
 # $IRONIC_VM_COUNT=3 the ports 6230, 6231 and 6232 will be used for the
@ -1479,6 +1486,9 @@ function configure_ironic_conductor {
    iniset $IRONIC_CONF_FILE pxe tftp_server $IRONIC_TFTPSERVER_IP
    iniset $IRONIC_CONF_FILE pxe tftp_root $IRONIC_TFTPBOOT_DIR
    iniset $IRONIC_CONF_FILE pxe tftp_master_path $IRONIC_TFTPBOOT_DIR/master_images
+    if [[ -n "$IRONIC_PXE_BOOT_RETRY_TIMEOUT" ]]; then
+        iniset $IRONIC_CONF_FILE pxe boot_retry_timeout $IRONIC_PXE_BOOT_RETRY_TIMEOUT
+    fi
    if [[ "$IRONIC_TERMINAL_SSL" == "True" ]]; then
        # Make sure the cert directory exist
        sudo mkdir -p $IRONIC_TERMINAL_CERT_DIR
--- a/ironic/conductor/utils.py
+++ b/ironic/conductor/utils.py
@ -272,6 +272,10 @@ def node_power_action(task, new_state, timeout=None):
    if node['target_power_state'] != target_state:
        node['target_power_state'] = target_state
        node['last_error'] = None
+        driver_internal_info = node.driver_internal_info
+        driver_internal_info['last_power_state_change'] = str(
+            timeutils.utcnow().isoformat())
+        node.driver_internal_info = driver_internal_info
        node.save()

    # take power action
@ -872,6 +876,22 @@ def fast_track_able(task):
            and task.node.last_error is None)


+def value_within_timeout(value, timeout):
+    """Checks if the time is within the previous timeout seconds from now.
+
+    :param value: a string representing date and time or None.
+    :param timeout: timeout in seconds.
+    """
+    # use native datetime objects for conversion and compare
+    # slightly odd because py2 compatability :(
+    last = datetime.datetime.strptime(value or '1970-01-01T00:00:00.000000',
+                                      "%Y-%m-%dT%H:%M:%S.%f")
+    # If we found nothing, we assume that the time is essentially epoch.
+    time_delta = datetime.timedelta(seconds=timeout)
+    last_valid = timeutils.utcnow() - time_delta
+    return last_valid <= last
+
+
 def is_fast_track(task):
    """Checks a fast track is available.

@ -888,19 +908,8 @@ def is_fast_track(task):
    :returns: True if the last heartbeat that was recorded was within
              the [deploy]fast_track_timeout setting.
    """
-    if not fast_track_able(task):
-        return False
-    # use native datetime objects for conversion and compare
-    # slightly odd because py2 compatability :(
-    last = datetime.datetime.strptime(
-        task.node.driver_internal_info.get(
-            'agent_last_heartbeat',
-            '1970-01-01T00:00:00.000000'),
-        "%Y-%m-%dT%H:%M:%S.%f")
-    # If we found nothing, we assume that the time is essentially epoch.
-    time_delta = datetime.timedelta(seconds=CONF.deploy.fast_track_timeout)
-    last_valid = timeutils.utcnow() - time_delta
-    # Checking the power state, because if we find the machine off due to
-    # any action, we can't actually fast track the node. :(
-    return (last_valid <= last
+    return (fast_track_able(task)
+            and value_within_timeout(
+                task.node.driver_internal_info.get('agent_last_heartbeat'),
+                CONF.deploy.fast_track_timeout)
            and task.driver.power.get_power_state(task) == states.POWER_ON)
--- a/ironic/conf/pxe.py
+++ b/ironic/conf/pxe.py
@ -126,6 +126,16 @@ opts = [
               default=0,
               help=_('Timeout value (in seconds) for downloading an image '
                      'via iPXE. Defaults to 0 (no timeout)')),
+    cfg.IntOpt('boot_retry_timeout',
+               min=60,
+               help=_('Timeout (in seconds) after which PXE boot should be '
+                      'retried. Must be less than [conductor]'
+                      'deploy_callback_timeout. Disabled by default.')),
+    cfg.IntOpt('boot_retry_check_interval',
+               default=90, min=1,
+               help=_('Interval (in seconds) between periodic checks on PXE '
+                      'boot retry. Has no effect if boot_retry_timeout '
+                      'is not set.')),
    cfg.StrOpt('ip_version',
               default='4',
               choices=[('4', _('IPv4')),
--- a/ironic/db/sqlalchemy/api.py
+++ b/ironic/db/sqlalchemy/api.py
@ -279,20 +279,28 @@ def _zip_matching(a, b, key):
 class Connection(api.Connection):
    """SqlAlchemy connection."""

+    _NODE_QUERY_FIELDS = {'console_enabled', 'maintenance', 'driver',
+                          'resource_class', 'provision_state', 'uuid', 'id',
+                          'fault', 'conductor_group', 'owner'}
+    _NODE_IN_QUERY_FIELDS = {'%s_in' % field: field
+                             for field in ('uuid', 'provision_state')}
+    _NODE_NON_NULL_FILTERS = {'associated': 'instance_uuid',
+                              'reserved': 'reservation',
+                              'with_power_state': 'power_state'}
+    _NODE_FILTERS = ({'chassis_uuid', 'reserved_by_any_of',
+                      'provisioned_before', 'inspection_started_before',
+                      'description_contains'}
+                     | _NODE_QUERY_FIELDS
+                     | set(_NODE_IN_QUERY_FIELDS)
+                     | set(_NODE_NON_NULL_FILTERS))
+
    def __init__(self):
        pass

    def _validate_nodes_filters(self, filters):
        if filters is None:
            filters = dict()
-        supported_filters = {'console_enabled', 'maintenance', 'driver',
-                             'resource_class', 'provision_state', 'uuid', 'id',
-                             'chassis_uuid', 'associated', 'reserved',
-                             'reserved_by_any_of', 'provisioned_before',
-                             'inspection_started_before', 'fault',
-                             'conductor_group', 'owner', 'uuid_in',
-                             'with_power_state', 'description_contains'}
-        unsupported_filters = set(filters).difference(supported_filters)
+        unsupported_filters = set(filters).difference(self._NODE_FILTERS)
        if unsupported_filters:
            msg = _("SqlAlchemy API does not support "
                    "filtering by %s") % ', '.join(unsupported_filters)
@ -301,26 +309,26 @@ class Connection(api.Connection):

    def _add_nodes_filters(self, query, filters):
        filters = self._validate_nodes_filters(filters)
-        for field in ['console_enabled', 'maintenance', 'driver',
-                      'resource_class', 'provision_state', 'uuid', 'id',
-                      'fault', 'conductor_group', 'owner']:
+        for field in self._NODE_QUERY_FIELDS:
            if field in filters:
                query = query.filter_by(**{field: filters[field]})
+        for key, field in self._NODE_IN_QUERY_FIELDS.items():
+            if key in filters:
+                query = query.filter(
+                    getattr(models.Node, field).in_(filters[key]))
+        for key, field in self._NODE_NON_NULL_FILTERS.items():
+            if key in filters:
+                column = getattr(models.Node, field)
+                if filters[key]:
+                    query = query.filter(column != sql.null())
+                else:
+                    query = query.filter(column == sql.null())
+
        if 'chassis_uuid' in filters:
            # get_chassis_by_uuid() to raise an exception if the chassis
            # is not found
            chassis_obj = self.get_chassis_by_uuid(filters['chassis_uuid'])
            query = query.filter_by(chassis_id=chassis_obj.id)
-        if 'associated' in filters:
-            if filters['associated']:
-                query = query.filter(models.Node.instance_uuid != sql.null())
-            else:
-                query = query.filter(models.Node.instance_uuid == sql.null())
-        if 'reserved' in filters:
-            if filters['reserved']:
-                query = query.filter(models.Node.reservation != sql.null())
-            else:
-                query = query.filter(models.Node.reservation == sql.null())
        if 'reserved_by_any_of' in filters:
            query = query.filter(models.Node.reservation.in_(
                filters['reserved_by_any_of']))
@ -334,13 +342,6 @@ class Connection(api.Connection):
                     - (datetime.timedelta(
                         seconds=filters['inspection_started_before'])))
            query = query.filter(models.Node.inspection_started_at < limit)
-        if 'uuid_in' in filters:
-            query = query.filter(models.Node.uuid.in_(filters['uuid_in']))
-        if 'with_power_state' in filters:
-            if filters['with_power_state']:
-                query = query.filter(models.Node.power_state != sql.null())
-            else:
-                query = query.filter(models.Node.power_state == sql.null())
        if 'description_contains' in filters:
            keyword = filters['description_contains']
            if keyword is not None:
--- a/ironic/drivers/modules/pxe.py
+++ b/ironic/drivers/modules/pxe.py
@ -17,7 +17,6 @@ PXE Boot Interface

 from ironic_lib import metrics_utils
 from oslo_log import log as logging
-from oslo_utils import strutils

 from ironic.common import boot_devices
 from ironic.common import dhcp_factory
@ -174,14 +173,7 @@ class PXEBoot(pxe_base.PXEBaseMixin, base.BootInterface):
                                    pxe_config_template,
                                    ipxe_enabled=CONF.pxe.ipxe_enabled)

-        persistent = False
-        value = node.driver_info.get('force_persistent_boot_device',
-                                     'Default')
-        if value in {'Always', 'Default', 'Never'}:
-            if value == 'Always':
-                persistent = True
-        else:
-            persistent = strutils.bool_from_string(value, False)
+        persistent = self._persistent_ramdisk_boot(node)
        manager_utils.node_set_boot_device(task, boot_devices.PXE,
                                           persistent=persistent)

--- a/ironic/drivers/modules/pxe_base.py
+++ b/ironic/drivers/modules/pxe_base.py
@ -13,13 +13,24 @@
 Base PXE Interface Methods
 """

+from futurist import periodics
 from ironic_lib import metrics_utils
+from oslo_config import cfg
 from oslo_log import log as logging
+from oslo_utils import strutils

+from ironic.common import boot_devices
 from ironic.common import exception
 from ironic.common.i18n import _
 from ironic.common import pxe_utils
+from ironic.common import states
+from ironic.conductor import task_manager
+from ironic.conductor import utils as manager_utils
 from ironic.drivers.modules import deploy_utils
+
+
+CONF = cfg.CONF
+
 LOG = logging.getLogger(__name__)

 METRICS = metrics_utils.get_metrics_logger(__name__)
@ -99,3 +110,82 @@ class PXEBaseMixin(object):
            parameters
        """
        pxe_utils.parse_driver_info(task.node, mode='rescue')
+
+    def _persistent_ramdisk_boot(self, node):
+        """If the ramdisk should be configured as a persistent boot device."""
+        value = node.driver_info.get('force_persistent_boot_device', 'Default')
+        if value in {'Always', 'Default', 'Never'}:
+            return value == 'Always'
+        else:
+            return strutils.bool_from_string(value, False)
+
+    _RETRY_ALLOWED_STATES = {states.DEPLOYWAIT, states.CLEANWAIT,
+                             states.RESCUEWAIT}
+
+    @METRICS.timer('PXEBaseMixin._check_boot_timeouts')
+    @periodics.periodic(spacing=CONF.pxe.boot_retry_check_interval,
+                        enabled=bool(CONF.pxe.boot_retry_timeout))
+    def _check_boot_timeouts(self, manager, context):
+        """Periodically checks whether boot has timed out and retry it.
+
+        :param manager: conductor manager.
+        :param context: request context.
+        """
+        filters = {'provision_state_in': self._RETRY_ALLOWED_STATES,
+                   'reserved': False,
+                   'maintenance': False,
+                   'provisioned_before': CONF.pxe.boot_retry_timeout}
+        node_iter = manager.iter_nodes(filters=filters)
+
+        for node_uuid, driver, conductor_group in node_iter:
+            try:
+                lock_purpose = 'checking PXE boot status'
+                with task_manager.acquire(context, node_uuid,
+                                          shared=True,
+                                          purpose=lock_purpose) as task:
+                    self._check_boot_status(task)
+            except (exception.NodeLocked, exception.NodeNotFound):
+                continue
+
+    def _check_boot_status(self, task):
+        if not isinstance(task.driver.boot, PXEBaseMixin):
+            return
+
+        if not _should_retry_boot(task.node):
+            return
+
+        task.upgrade_lock(purpose='retrying PXE boot')
+
+        # Retry critical checks after acquiring the exclusive lock.
+        if (task.node.maintenance or task.node.provision_state
+                not in self._RETRY_ALLOWED_STATES
+                or not _should_retry_boot(task.node)):
+            return
+
+        LOG.info('Booting the ramdisk on node %(node)s is taking more than '
+                 '%(timeout)d seconds, retrying boot',
+                 {'node': task.node.uuid,
+                  'timeout': CONF.pxe.boot_retry_timeout})
+
+        manager_utils.node_power_action(task, states.POWER_OFF)
+        # NOTE(dtantsur): retry even persistent boot setting in case it did not
+        # work for some reason.
+        persistent = self._persistent_ramdisk_boot(task.node)
+        manager_utils.node_set_boot_device(task, boot_devices.PXE,
+                                           persistent=persistent)
+        manager_utils.node_power_action(task, states.POWER_ON)
+
+
+def _should_retry_boot(node):
+    # NOTE(dtantsur): this assumes IPA, do we need to make it generic?
+    for field in ('agent_last_heartbeat', 'last_power_state_change'):
+        if manager_utils.value_within_timeout(
+                node.driver_internal_info.get(field),
+                CONF.pxe.boot_retry_timeout):
+            # Alive and heartbeating, probably busy with something long
+            LOG.debug('Not retrying PXE boot for node %(node)s; its '
+                      '%(event)s happened less than %(timeout)d seconds ago',
+                      {'node': node.uuid, 'event': field,
+                       'timeout': CONF.pxe.boot_retry_timeout})
+            return False
+    return True
--- a/ironic/tests/unit/db/test_nodes.py
+++ b/ironic/tests/unit/db/test_nodes.py
@ -231,7 +231,8 @@ class DbNodeTestCase(base.DbTestCase):

        # node with provision_updated timeout
        node1 = utils.create_test_node(uuid=uuidutils.generate_uuid(),
-                                       provision_updated_at=past)
+                                       provision_updated_at=past,
+                                       provision_state=states.DEPLOYING)
        # node with None in provision_updated_at
        node2 = utils.create_test_node(uuid=uuidutils.generate_uuid(),
                                       provision_state=states.DEPLOYWAIT)
@ -247,6 +248,10 @@ class DbNodeTestCase(base.DbTestCase):
                                                    states.DEPLOYWAIT})
        self.assertEqual([node2.id], [r[0] for r in res])

+        res = self.dbapi.get_nodeinfo_list(
+            filters={'provision_state_in': [states.ACTIVE, states.DEPLOYING]})
+        self.assertEqual([node1.id], [r[0] for r in res])
+
    @mock.patch.object(timeutils, 'utcnow', autospec=True)
    def test_get_nodeinfo_list_inspection(self, mock_utcnow):
        past = datetime.datetime(2000, 1, 1, 0, 0)
--- a/ironic/tests/unit/drivers/modules/test_pxe.py
+++ b/ironic/tests/unit/drivers/modules/test_pxe.py
@ -21,6 +21,7 @@ import tempfile
 import mock
 from oslo_config import cfg
 from oslo_serialization import jsonutils as json
+from oslo_utils import timeutils
 from oslo_utils import uuidutils

 from ironic.common import boot_devices
@ -1162,3 +1163,98 @@ class PXEValidateRescueTestCase(db_base.DbTestCase):
            self.assertRaisesRegex(exception.MissingParameterValue,
                                   'Missing.*rescue_kernel',
                                   task.driver.boot.validate_rescue, task)
+
+
+@mock.patch.object(ipxe.iPXEBoot, '__init__', lambda self: None)
+@mock.patch.object(pxe.PXEBoot, '__init__', lambda self: None)
+@mock.patch.object(manager_utils, 'node_set_boot_device', autospec=True)
+@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
+class PXEBootRetryTestCase(db_base.DbTestCase):
+
+    boot_interface = 'pxe'
+
+    def setUp(self):
+        super(PXEBootRetryTestCase, self).setUp()
+        self.config(enabled_boot_interfaces=['pxe', 'ipxe', 'fake'])
+        self.config(boot_retry_timeout=300, group='pxe')
+        self.node = obj_utils.create_test_node(
+            self.context,
+            driver='fake-hardware',
+            boot_interface=self.boot_interface,
+            provision_state=states.DEPLOYWAIT)
+
+    @mock.patch.object(pxe.PXEBoot, '_check_boot_status', autospec=True)
+    def test_check_boot_timeouts(self, mock_check_status, mock_power,
+                                 mock_boot_dev):
+        def _side_effect(iface, task):
+            self.assertEqual(self.node.uuid, task.node.uuid)
+
+        mock_check_status.side_effect = _side_effect
+        manager = mock.Mock(spec=['iter_nodes'])
+        manager.iter_nodes.return_value = [
+            (uuidutils.generate_uuid(), 'fake-hardware', ''),
+            (self.node.uuid, self.node.driver, self.node.conductor_group)
+        ]
+        iface = pxe.PXEBoot()
+        iface._check_boot_timeouts(manager, self.context)
+        mock_check_status.assert_called_once_with(iface, mock.ANY)
+
+    def test_check_boot_status_another_boot_interface(self, mock_power,
+                                                      mock_boot_dev):
+        with task_manager.acquire(self.context, self.node.uuid,
+                                  shared=True) as task:
+            task.driver.boot = fake.FakeBoot()
+            pxe.PXEBoot()._check_boot_status(task)
+            self.assertTrue(task.shared)
+        self.assertFalse(mock_power.called)
+        self.assertFalse(mock_boot_dev.called)
+
+    def test_check_boot_status_recent_power_change(self, mock_power,
+                                                   mock_boot_dev):
+        for field in ('agent_last_heartbeat', 'last_power_state_change'):
+            with task_manager.acquire(self.context, self.node.uuid,
+                                      shared=True) as task:
+                task.node.driver_internal_info = {
+                    field: str(timeutils.utcnow().isoformat())
+                }
+                task.driver.boot._check_boot_status(task)
+                self.assertTrue(task.shared)
+            self.assertFalse(mock_power.called)
+            self.assertFalse(mock_boot_dev.called)
+
+    def test_check_boot_status_maintenance(self, mock_power, mock_boot_dev):
+        self.node.maintenance = True
+        self.node.save()
+        with task_manager.acquire(self.context, self.node.uuid,
+                                  shared=True) as task:
+            task.driver.boot._check_boot_status(task)
+            self.assertFalse(task.shared)
+        self.assertFalse(mock_power.called)
+        self.assertFalse(mock_boot_dev.called)
+
+    def test_check_boot_status_wrong_state(self, mock_power, mock_boot_dev):
+        self.node.provision_state = states.DEPLOYING
+        self.node.save()
+        with task_manager.acquire(self.context, self.node.uuid,
+                                  shared=True) as task:
+            task.driver.boot._check_boot_status(task)
+            self.assertFalse(task.shared)
+        self.assertFalse(mock_power.called)
+        self.assertFalse(mock_boot_dev.called)
+
+    def test_check_boot_status_retry(self, mock_power, mock_boot_dev):
+        with task_manager.acquire(self.context, self.node.uuid,
+                                  shared=True) as task:
+            task.driver.boot._check_boot_status(task)
+            self.assertFalse(task.shared)
+            mock_power.assert_has_calls([
+                mock.call(task, states.POWER_OFF),
+                mock.call(task, states.POWER_ON)
+            ])
+            mock_boot_dev.assert_called_once_with(task, 'pxe',
+                                                  persistent=False)
+
+
+class iPXEBootRetryTestCase(PXEBootRetryTestCase):
+
+    boot_interface = 'ipxe'
--- a/releasenotes/notes/pxe-retry-762a00ba1089bd75.yaml
+++ b/releasenotes/notes/pxe-retry-762a00ba1089bd75.yaml
@ -0,0 +1,10 @@
+---
+features:
+  - |
+    Allows retrying PXE/iPXE boot during deployment, cleaning and rescuing.
+    This feature is disabled by default and can be enabled by setting
+    ``[pxe]boot_retry_timeout`` to the timeout (in seconds) after which
+    the boot should be retried.
+
+    The new option ``[pxe]boot_retry_check_interval`` defines how often to
+    check the nodes for timeout and defaults to 90 seconds.