Gracefully ERROR in _init_instance if vnic_type changed
If the vnic_type of a bound port changes from "direct" to "macvtap" and then the compute service is restarted then during _init_instance nova tries to plug the vif of the changed port. However as it now has macvtap vnic_type nova tries to look up the netdev of the parent VF. Still that VF is consumed by the instance so there is no such netdev on the host OS. This error killed the compute service at startup due to unhandled exception. This patch adds the exception handler, logs an ERROR and continue initializing other instances on the host. Also this patch adds a detailed ERROR log when nova detects that the vnic_type changed during _heal_instance_info_cache periodic. Closes-Bug: #1981813 Change-Id: I1719f8eda04e8d15a3b01f0612977164c4e55e85
This commit is contained in:
parent
f8c91eb75f
commit
e43bf900dc
@ -1247,6 +1247,20 @@ class ComputeManager(manager.Manager):
|
||||
'updated.', instance=instance)
|
||||
self._set_instance_obj_error_state(instance)
|
||||
return
|
||||
except exception.PciDeviceNotFoundById:
|
||||
# This is bug 1981813 where the bound port vnic_type has changed
|
||||
# from direct to macvtap. Nova does not support that and it
|
||||
# already printed an ERROR when the change is detected during
|
||||
# _heal_instance_info_cache. Now we print an ERROR again and skip
|
||||
# plugging the vifs but let the service startup continue to init
|
||||
# the other instances
|
||||
LOG.exception(
|
||||
'Virtual interface plugging failed for instance. Probably the '
|
||||
'vnic_type of the bound port has been changed. Nova does not '
|
||||
'support such change.',
|
||||
instance=instance
|
||||
)
|
||||
return
|
||||
|
||||
if instance.task_state == task_states.RESIZE_MIGRATING:
|
||||
# We crashed during resize/migration, so roll back for safety
|
||||
|
@ -3356,6 +3356,25 @@ class API:
|
||||
delegate_create=True,
|
||||
)
|
||||
|
||||
def _log_error_if_vnic_type_changed(
|
||||
self, port_id, old_vnic_type, new_vnic_type, instance
|
||||
):
|
||||
if old_vnic_type and old_vnic_type != new_vnic_type:
|
||||
LOG.error(
|
||||
'The vnic_type of the bound port %s has '
|
||||
'been changed in neutron from "%s" to '
|
||||
'"%s". Changing vnic_type of a bound port '
|
||||
'is not supported by Nova. To avoid '
|
||||
'breaking the connectivity of the instance '
|
||||
'please change the port vnic_type back to '
|
||||
'"%s".',
|
||||
port_id,
|
||||
old_vnic_type,
|
||||
new_vnic_type,
|
||||
old_vnic_type,
|
||||
instance=instance
|
||||
)
|
||||
|
||||
def _build_network_info_model(self, context, instance, networks=None,
|
||||
port_ids=None, admin_client=None,
|
||||
preexisting_port_ids=None,
|
||||
@ -3429,6 +3448,12 @@ class API:
|
||||
preexisting_port_ids)
|
||||
for index, vif in enumerate(nw_info):
|
||||
if vif['id'] == refresh_vif_id:
|
||||
self._log_error_if_vnic_type_changed(
|
||||
vif['id'],
|
||||
vif['vnic_type'],
|
||||
refreshed_vif['vnic_type'],
|
||||
instance,
|
||||
)
|
||||
# Update the existing entry.
|
||||
nw_info[index] = refreshed_vif
|
||||
LOG.debug('Updated VIF entry in instance network '
|
||||
@ -3478,6 +3503,7 @@ class API:
|
||||
networks, port_ids = self._gather_port_ids_and_networks(
|
||||
context, instance, networks, port_ids, client)
|
||||
|
||||
old_nw_info = instance.get_network_info()
|
||||
nw_info = network_model.NetworkInfo()
|
||||
for port_id in port_ids:
|
||||
current_neutron_port = current_neutron_port_map.get(port_id)
|
||||
@ -3485,6 +3511,14 @@ class API:
|
||||
vif = self._build_vif_model(
|
||||
context, client, current_neutron_port, networks,
|
||||
preexisting_port_ids)
|
||||
for old_vif in old_nw_info:
|
||||
if old_vif['id'] == port_id:
|
||||
self._log_error_if_vnic_type_changed(
|
||||
port_id,
|
||||
old_vif['vnic_type'],
|
||||
vif['vnic_type'],
|
||||
instance,
|
||||
)
|
||||
nw_info.append(vif)
|
||||
elif nw_info_refresh:
|
||||
LOG.info('Port %s from network info_cache is no '
|
||||
|
@ -1077,6 +1077,14 @@ class SRIOVServersTest(_PCIServersWithMigrationTestBase):
|
||||
self.host_mappings['compute1'].cell_mapping
|
||||
) as cctxt:
|
||||
compute.manager._heal_instance_info_cache(cctxt)
|
||||
self.assertIn(
|
||||
'The vnic_type of the bound port %s has been changed in '
|
||||
'neutron from "direct" to "macvtap". Changing vnic_type of a '
|
||||
'bound port is not supported by Nova. To avoid breaking the '
|
||||
'connectivity of the instance please change the port '
|
||||
'vnic_type back to "direct".' % port['id'],
|
||||
self.stdlog.logger.output,
|
||||
)
|
||||
|
||||
def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False):
|
||||
# we want to fail the netdev lookup only if the pci_address is
|
||||
@ -1103,15 +1111,16 @@ class SRIOVServersTest(_PCIServersWithMigrationTestBase):
|
||||
self.libvirt.mock_get_ifname_by_pci_address.side_effect = (
|
||||
fake_get_ifname_by_pci_address
|
||||
)
|
||||
# This is bug 1981813 as the compute service fails to start with an
|
||||
# exception.
|
||||
# Nova cannot prevent the vnic_type change on a bound port. Neutron
|
||||
# should prevent that instead. But the nova-compute should still
|
||||
# be able to start up and only log an ERROR for this instance in
|
||||
# inconsistent state.
|
||||
self.assertRaises(
|
||||
exception.PciDeviceNotFoundById,
|
||||
self.restart_compute_service, 'compute1'
|
||||
self.restart_compute_service('compute1')
|
||||
self.assertIn(
|
||||
'Virtual interface plugging failed for instance. Probably the '
|
||||
'vnic_type of the bound port has been changed. Nova does not '
|
||||
'support such change.',
|
||||
self.stdlog.logger.output,
|
||||
)
|
||||
|
||||
|
||||
|
@ -1350,6 +1350,36 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
self.compute._init_instance(self.context, instance)
|
||||
set_error_state.assert_called_once_with(instance)
|
||||
|
||||
def test_init_instance_vif_plug_fails_missing_pci(self):
|
||||
instance = fake_instance.fake_instance_obj(
|
||||
self.context,
|
||||
uuid=uuids.instance,
|
||||
info_cache=None,
|
||||
power_state=power_state.RUNNING,
|
||||
vm_state=vm_states.ACTIVE,
|
||||
task_state=None,
|
||||
host=self.compute.host,
|
||||
expected_attrs=['info_cache'])
|
||||
|
||||
with test.nested(
|
||||
mock.patch.object(context, 'get_admin_context',
|
||||
return_value=self.context),
|
||||
mock.patch.object(objects.Instance, 'get_network_info',
|
||||
return_value=network_model.NetworkInfo()),
|
||||
mock.patch.object(self.compute.driver, 'plug_vifs',
|
||||
side_effect=exception.PciDeviceNotFoundById("pci-addr")),
|
||||
mock.patch("nova.compute.manager.LOG.exception"),
|
||||
) as (get_admin_context, get_nw_info, plug_vifs, log_exception):
|
||||
# as this does not raise, we are sure that the compute service
|
||||
# continues initializing the rest of the instances
|
||||
self.compute._init_instance(self.context, instance)
|
||||
log_exception.assert_called_once_with(
|
||||
"Virtual interface plugging failed for instance. Probably the "
|
||||
"vnic_type of the bound port has been changed. Nova does not "
|
||||
"support such change.",
|
||||
instance=instance
|
||||
)
|
||||
|
||||
def _test__validate_pinning_configuration(self, supports_pcpus=True):
|
||||
instance_1 = fake_instance.fake_instance_obj(
|
||||
self.context, uuid=uuids.instance_1)
|
||||
|
@ -3382,6 +3382,155 @@ class TestAPI(TestAPIBase):
|
||||
mocked_client.list_ports.assert_called_once_with(
|
||||
tenant_id=uuids.fake, device_id=uuids.instance)
|
||||
|
||||
@mock.patch.object(
|
||||
neutronapi.API,
|
||||
'_get_physnet_tunneled_info',
|
||||
new=mock.Mock(return_value=(None, False)))
|
||||
@mock.patch.object(
|
||||
neutronapi.API,
|
||||
'_get_preexisting_port_ids',
|
||||
new=mock.Mock(return_value=[]))
|
||||
@mock.patch.object(
|
||||
neutronapi.API,
|
||||
'_get_subnets_from_port',
|
||||
new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')]))
|
||||
@mock.patch.object(
|
||||
neutronapi.API,
|
||||
'_get_floating_ips_by_fixed_and_port',
|
||||
new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}]))
|
||||
@mock.patch.object(neutronapi, 'get_client')
|
||||
def test_build_network_info_model_full_vnic_type_change(
|
||||
self, mock_get_client
|
||||
):
|
||||
mocked_client = mock.create_autospec(client.Client)
|
||||
mock_get_client.return_value = mocked_client
|
||||
fake_inst = objects.Instance()
|
||||
fake_inst.project_id = uuids.fake
|
||||
fake_inst.uuid = uuids.instance
|
||||
fake_ports = [
|
||||
{
|
||||
"id": "port1",
|
||||
"network_id": "net-id",
|
||||
"tenant_id": uuids.fake,
|
||||
"admin_state_up": True,
|
||||
"status": "ACTIVE",
|
||||
"fixed_ips": [{"ip_address": "1.1.1.1"}],
|
||||
"mac_address": "de:ad:be:ef:00:01",
|
||||
"binding:vif_type": model.VIF_TYPE_BRIDGE,
|
||||
"binding:vnic_type": model.VNIC_TYPE_DIRECT,
|
||||
"binding:vif_details": {},
|
||||
},
|
||||
]
|
||||
mocked_client.list_ports.return_value = {'ports': fake_ports}
|
||||
fake_inst.info_cache = objects.InstanceInfoCache.new(
|
||||
self.context, uuids.instance)
|
||||
fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([])
|
||||
|
||||
# build the network info first
|
||||
nw_infos = self.api._build_network_info_model(
|
||||
self.context,
|
||||
fake_inst,
|
||||
force_refresh=True,
|
||||
)
|
||||
|
||||
self.assertEqual(1, len(nw_infos))
|
||||
fake_inst.info_cache.network_info = nw_infos
|
||||
|
||||
# change the vnic_type of the port and rebuild the network info
|
||||
fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP
|
||||
with mock.patch(
|
||||
"nova.network.neutron.API._log_error_if_vnic_type_changed"
|
||||
) as mock_log:
|
||||
nw_infos = self.api._build_network_info_model(
|
||||
self.context,
|
||||
fake_inst,
|
||||
force_refresh=True,
|
||||
)
|
||||
|
||||
mock_log.assert_called_once_with(
|
||||
fake_ports[0]["id"], "direct", "macvtap", fake_inst)
|
||||
self.assertEqual(1, len(nw_infos))
|
||||
|
||||
@mock.patch.object(
|
||||
neutronapi.API,
|
||||
'_get_physnet_tunneled_info',
|
||||
new=mock.Mock(return_value=(None, False)))
|
||||
@mock.patch.object(
|
||||
neutronapi.API,
|
||||
'_get_preexisting_port_ids',
|
||||
new=mock.Mock(return_value=[]))
|
||||
@mock.patch.object(
|
||||
neutronapi.API,
|
||||
'_get_subnets_from_port',
|
||||
new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')]))
|
||||
@mock.patch.object(
|
||||
neutronapi.API,
|
||||
'_get_floating_ips_by_fixed_and_port',
|
||||
new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}]))
|
||||
@mock.patch.object(neutronapi, 'get_client')
|
||||
def test_build_network_info_model_single_vnic_type_change(
|
||||
self, mock_get_client
|
||||
):
|
||||
mocked_client = mock.create_autospec(client.Client)
|
||||
mock_get_client.return_value = mocked_client
|
||||
fake_inst = objects.Instance()
|
||||
fake_inst.project_id = uuids.fake
|
||||
fake_inst.uuid = uuids.instance
|
||||
fake_ports = [
|
||||
{
|
||||
"id": "port1",
|
||||
"network_id": "net-id",
|
||||
"tenant_id": uuids.fake,
|
||||
"admin_state_up": True,
|
||||
"status": "ACTIVE",
|
||||
"fixed_ips": [{"ip_address": "1.1.1.1"}],
|
||||
"mac_address": "de:ad:be:ef:00:01",
|
||||
"binding:vif_type": model.VIF_TYPE_BRIDGE,
|
||||
"binding:vnic_type": model.VNIC_TYPE_DIRECT,
|
||||
"binding:vif_details": {},
|
||||
},
|
||||
]
|
||||
fake_nets = [
|
||||
{
|
||||
"id": "net-id",
|
||||
"name": "foo",
|
||||
"tenant_id": uuids.fake,
|
||||
}
|
||||
]
|
||||
mocked_client.list_ports.return_value = {'ports': fake_ports}
|
||||
fake_inst.info_cache = objects.InstanceInfoCache.new(
|
||||
self.context, uuids.instance)
|
||||
fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([])
|
||||
|
||||
# build the network info first
|
||||
nw_infos = self.api._build_network_info_model(
|
||||
self.context,
|
||||
fake_inst,
|
||||
fake_nets,
|
||||
[fake_ports[0]["id"]],
|
||||
refresh_vif_id=fake_ports[0]["id"],
|
||||
)
|
||||
|
||||
self.assertEqual(1, len(nw_infos))
|
||||
fake_inst.info_cache.network_info = nw_infos
|
||||
|
||||
# change the vnic_type of the port and rebuild the network info
|
||||
fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP
|
||||
with mock.patch(
|
||||
"nova.network.neutron.API._log_error_if_vnic_type_changed"
|
||||
) as mock_log:
|
||||
nw_infos = self.api._build_network_info_model(
|
||||
self.context,
|
||||
fake_inst,
|
||||
fake_nets,
|
||||
[fake_ports[0]["id"]],
|
||||
refresh_vif_id=fake_ports[0]["id"],
|
||||
)
|
||||
|
||||
mock_log.assert_called_once_with(
|
||||
fake_ports[0]["id"], "direct", "macvtap", fake_inst)
|
||||
self.assertEqual(1, len(nw_infos))
|
||||
|
||||
@mock.patch.object(neutronapi, 'get_client')
|
||||
def test_get_subnets_from_port(self, mock_get_client):
|
||||
mocked_client = mock.create_autospec(client.Client)
|
||||
|
@ -0,0 +1,9 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
`Bug #1981813 <https://bugs.launchpad.net/nova/+bug/1981813>`_: Now nova
|
||||
detects if the ``vnic_type`` of a bound port has been changed in neutron
|
||||
and leaves an ERROR message in the compute service log as such change on a
|
||||
bound port is not supported. Also the restart of the nova-compute service
|
||||
will not crash any more after such port change. Nova will log an ERROR and
|
||||
skip the initialization of the instance with such port during the startup.
|
Loading…
Reference in New Issue
Block a user