N3000 FEC device config does not recover on host re-install
Problem detected on the port update as the reports, generated before the worker node configuration, are erasing the SRIOV parameters obtained from the previous installation. Since the reinstall is done in a locked state, the validation done between the number of VFs and the number of PCI addresses is failing due to this update being done from an unconfigured state and the node cannot finish the configuration. The correction consists in transport the value of worker_config_completed back to the conductor and uses it to avoid SRIOV parameter updates from an unconfigured SRIOV port During the stand-by controller reinstallation the active controller might collect inventory data prior to the FPGA reset. This reset might relocate the device's PCI addresses creating invalid entries on the active database due to the initial inventory report. The correction consists in transport the N3000 reset state back to the conductor and use this information to decide if the entry will be incorporated on the database (true if the reset was executed). Closes-bug: 1929301 Signed-off-by: Andre Fernando Zanella Kantek <AndreFernandoZanella.Kantek@windriver.com> Change-Id: Ie3db6f4b13abc905ff533660196e7935239fc6fb
This commit is contained in:
parent
9ef262a5a7
commit
4cf6aa9344
|
@ -69,6 +69,7 @@ from sysinv.openstack.common import periodic_task
|
|||
from sysinv.openstack.common.rpc.common import Timeout
|
||||
from sysinv.openstack.common.rpc.common import serialize_remote_exception
|
||||
from sysinv.openstack.common.rpc.common import RemoteError
|
||||
from sysinv.fpga_agent import constants as fpga_constants
|
||||
|
||||
import tsconfig.tsconfig as tsc
|
||||
|
||||
|
@ -660,12 +661,16 @@ class AgentManager(service.PeriodicService):
|
|||
'speed': port.speed,
|
||||
'link_mode': port.link_mode,
|
||||
'dev_id': port.dev_id,
|
||||
'dpdksupport': port.dpdksupport}
|
||||
'dpdksupport': port.dpdksupport,
|
||||
'worker_config_completed': worker_config_completed}
|
||||
|
||||
LOG.debug('Sysinv Agent inic {}'.format(inic_dict))
|
||||
|
||||
port_list.append(inic_dict)
|
||||
|
||||
is_fpga_n3000_reset = \
|
||||
os.path.exists(fpga_constants.N3000_RESET_FLAG)
|
||||
|
||||
for dev in pci_devs:
|
||||
pci_dev_dict = {'name': dev.name,
|
||||
'pciaddr': dev.pci.pciaddr,
|
||||
|
@ -686,7 +691,8 @@ class AgentManager(service.PeriodicService):
|
|||
'sriov_vf_pdevice_id': dev.sriov_vf_pdevice_id,
|
||||
'driver': dev.driver,
|
||||
'enabled': dev.enabled,
|
||||
'extra_info': dev.extra_info}
|
||||
'extra_info': dev.extra_info,
|
||||
'fpga_n3000_reset': is_fpga_n3000_reset}
|
||||
LOG.debug('Sysinv Agent dev {}'.format(pci_dev_dict))
|
||||
|
||||
pci_device_list.append(pci_dev_dict)
|
||||
|
|
|
@ -2309,18 +2309,19 @@ class ConductorManager(service.PeriodicService):
|
|||
try:
|
||||
port_dict = {
|
||||
'sriov_totalvfs': inic['sriov_totalvfs'],
|
||||
'sriov_numvfs': inic['sriov_numvfs'],
|
||||
'sriov_vfs_pci_address':
|
||||
inic['sriov_vfs_pci_address'],
|
||||
'sriov_vf_driver':
|
||||
inic['sriov_vf_driver'],
|
||||
'sriov_vf_pdevice_id':
|
||||
inic['sriov_vf_pdevice_id'],
|
||||
'driver': inic['driver'],
|
||||
'dpdksupport': inic['dpdksupport'],
|
||||
'speed': inic['speed'],
|
||||
}
|
||||
|
||||
# this data can only be updated if the node have its initial
|
||||
# configuration done
|
||||
if inic['worker_config_completed'] is True:
|
||||
port_dict['sriov_numvfs'] = inic['sriov_numvfs']
|
||||
port_dict['sriov_vfs_pci_address'] = inic['sriov_vfs_pci_address']
|
||||
port_dict['sriov_vf_driver'] = inic['sriov_vf_driver']
|
||||
port_dict['sriov_vf_pdevice_id'] = inic['sriov_vf_pdevice_id']
|
||||
|
||||
LOG.info("port %s update attr: %s" %
|
||||
(port.uuid, port_dict))
|
||||
self.dbapi.ethernet_port_update(port.uuid, port_dict)
|
||||
|
@ -2763,6 +2764,12 @@ class ConductorManager(service.PeriodicService):
|
|||
return
|
||||
for pci_dev in pci_device_dict_array:
|
||||
LOG.debug("Processing dev %s" % pci_dev)
|
||||
is_n3000_dev_not_reset = False
|
||||
if 'fpga_n3000_reset' in pci_dev.keys():
|
||||
is_n3000_dev_not_reset = (pci_dev['pdevice_id'] in fpga_constants.N3000_DEVICES
|
||||
and pci_dev['pvendor_id'] == fpga_constants.N3000_VENDOR
|
||||
and not pci_dev['fpga_n3000_reset'])
|
||||
del pci_dev['fpga_n3000_reset']
|
||||
try:
|
||||
pci_dev_dict = {'host_id': host['id']}
|
||||
pci_dev_dict.update(pci_dev)
|
||||
|
@ -2772,11 +2779,19 @@ class ConductorManager(service.PeriodicService):
|
|||
hostid=host['id'])
|
||||
dev_found = dev
|
||||
if not dev:
|
||||
if is_n3000_dev_not_reset:
|
||||
LOG.info("N3000 reset not executed, skip for dev="
|
||||
"%s on host %s" % (pci_dev_dict, host['id']))
|
||||
continue
|
||||
LOG.info("Attempting to create new device "
|
||||
"%s on host %s" % (pci_dev_dict, host['id']))
|
||||
dev = self.dbapi.pci_device_create(host['id'],
|
||||
pci_dev_dict)
|
||||
except Exception:
|
||||
if is_n3000_dev_not_reset:
|
||||
LOG.info("N3000 reset not executed, skip for dev="
|
||||
"%s on host %s" % (pci_dev_dict, host['id']))
|
||||
continue
|
||||
LOG.info("Attempting to create new device "
|
||||
"%s on host %s" % (pci_dev_dict, host['id']))
|
||||
dev = self.dbapi.pci_device_create(host['id'],
|
||||
|
@ -2817,6 +2832,10 @@ class ConductorManager(service.PeriodicService):
|
|||
# binding of the intended driver has not had a
|
||||
# chance to be applied.
|
||||
del attr['sriov_vf_driver']
|
||||
if is_n3000_dev_not_reset:
|
||||
LOG.info("N3000 reset not executed, skip for dev="
|
||||
"%s on host %s" % (pci_dev_dict, host['id']))
|
||||
continue
|
||||
dev = self.dbapi.pci_device_update(dev['uuid'], attr)
|
||||
except Exception:
|
||||
LOG.exception("Failed to update port %s" %
|
||||
|
|
|
@ -30,3 +30,5 @@ OPAE_IMG = "registry.local:9001/docker.io/starlingx/n3000-opae:stx.4.0-v1.0.0"
|
|||
# This is a flag file created by puppet after doing a "docker login".
|
||||
# We need to wait for it to exist before trying to run docker images.
|
||||
DOCKER_LOGIN_FLAG = "/var/run/docker_login_done"
|
||||
|
||||
N3000_RESET_FLAG = "/var/run/.sysinv_n3000_reset"
|
||||
|
|
|
@ -27,7 +27,7 @@ from sysinv.agent.pci import PCIOperator
|
|||
from sysinv.agent.pci import PCI
|
||||
from sysinv.agent.manager import AgentManager
|
||||
from sysinv.tests import base
|
||||
|
||||
from sysinv.fpga_agent import constants as fpga_constants
|
||||
import tsconfig.tsconfig as tsc
|
||||
|
||||
FAKE_LSPCI_OUTPUT = {
|
||||
|
@ -234,6 +234,26 @@ class TestAgentOperator(base.TestCase):
|
|||
mock_exists.side_effect = file_exists_side_effect
|
||||
|
||||
ports, devices, macs = self._get_ports_inventory()
|
||||
for dev in devices:
|
||||
assert dev['fpga_n3000_reset'] is False
|
||||
assert len(ports) == 1
|
||||
assert len(devices) == 1
|
||||
assert len(macs) == 1
|
||||
|
||||
@mock.patch('os.path.exists')
|
||||
def test_get_pci_inventory_n3000_reset_flag(self, mock_exists):
|
||||
def file_exists_side_effect(filename):
|
||||
if filename in [tsc.INITIAL_WORKER_CONFIG_COMPLETE,
|
||||
tsc.VOLATILE_WORKER_CONFIG_COMPLETE,
|
||||
fpga_constants.N3000_RESET_FLAG]:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
mock_exists.side_effect = file_exists_side_effect
|
||||
|
||||
ports, devices, macs = self._get_ports_inventory()
|
||||
for dev in devices:
|
||||
assert dev['fpga_n3000_reset'] is True
|
||||
assert len(ports) == 1
|
||||
assert len(devices) == 1
|
||||
assert len(macs) == 1
|
||||
|
|
|
@ -1732,6 +1732,127 @@ class ManagerTestCase(base.DbTestCase):
|
|||
dev = self.dbapi.pci_device_get(PCI_DEV_2['pciaddr'], host_id)
|
||||
self.assertEqual(dev['uuid'], PCI_DEV_2['uuid'])
|
||||
|
||||
def test_pci_device_update_n3000_by_host(self):
|
||||
# Create compute-0 node
|
||||
config_uuid = str(uuid.uuid4())
|
||||
ihost = self._create_test_ihost(
|
||||
personality=constants.WORKER,
|
||||
hostname='compute-0',
|
||||
uuid=str(uuid.uuid4()),
|
||||
config_status=None,
|
||||
config_applied=config_uuid,
|
||||
config_target=config_uuid,
|
||||
invprovision=constants.PROVISIONED,
|
||||
administrative=constants.ADMIN_UNLOCKED,
|
||||
operational=constants.OPERATIONAL_ENABLED,
|
||||
availability=constants.AVAILABILITY_ONLINE,
|
||||
)
|
||||
host_uuid = ihost['uuid']
|
||||
host_id = ihost['id']
|
||||
PCI_DEV_1 = {'uuid': str(uuid.uuid4()),
|
||||
'name': 'pci_dev_1',
|
||||
'pciaddr': '0000:0b:01.0',
|
||||
'pclass_id': '060100',
|
||||
'pvendor_id': '8086',
|
||||
'pdevice_id': '0443',
|
||||
'enabled': True,
|
||||
'fpga_n3000_reset': True} # is the FPGA reset
|
||||
PCI_DEV_2 = {'uuid': str(uuid.uuid4()),
|
||||
'name': 'pci_0000_b4_00_0',
|
||||
'pciaddr': '0000:b4:00.0',
|
||||
'pclass_id': '120000',
|
||||
'pvendor_id': '8086',
|
||||
'pdevice_id': '0d8f', # N3000 FEC
|
||||
'enabled': True,
|
||||
'fpga_n3000_reset': True} # is the FPGA reset
|
||||
|
||||
pci_device_dict_array = [PCI_DEV_1, PCI_DEV_2]
|
||||
|
||||
# create new dev
|
||||
self.service.pci_device_update_by_host(self.context, host_uuid, pci_device_dict_array)
|
||||
|
||||
dev = self.dbapi.pci_device_get(PCI_DEV_1['pciaddr'], host_id)
|
||||
for key in PCI_DEV_1:
|
||||
self.assertEqual(dev[key], PCI_DEV_1[key])
|
||||
|
||||
dev = self.dbapi.pci_device_get(PCI_DEV_2['pciaddr'], host_id)
|
||||
for key in PCI_DEV_2:
|
||||
self.assertEqual(dev[key], PCI_DEV_2[key])
|
||||
|
||||
# test with fpga_n3000_reset as False
|
||||
PCI_DEV_3 = {'uuid': str(uuid.uuid4()),
|
||||
'name': 'pci_dev_3',
|
||||
'pciaddr': '0000:0c:01.0',
|
||||
'pclass_id': '060100',
|
||||
'pvendor_id': '8086',
|
||||
'pdevice_id': '0443',
|
||||
'enabled': True,
|
||||
'fpga_n3000_reset': False} # is the FPGA reset
|
||||
PCI_DEV_4 = {'uuid': str(uuid.uuid4()),
|
||||
'name': 'pci_0000_b8_00_0',
|
||||
'pciaddr': '0000:b8:00.0',
|
||||
'pclass_id': '120000',
|
||||
'pvendor_id': '8086',
|
||||
'pdevice_id': '0d8f', # N3000_FEC_PF_DEVICE
|
||||
'enabled': True,
|
||||
'fpga_n3000_reset': False} # is the FPGA reset
|
||||
PCI_DEV_5 = {'uuid': str(uuid.uuid4()),
|
||||
'name': 'pci_0000_b9_00_0',
|
||||
'pciaddr': '0000:b9:00.0',
|
||||
'pclass_id': '120000',
|
||||
'pvendor_id': '8086',
|
||||
'pdevice_id': '0b30', # N3000_DEVICE
|
||||
'enabled': True,
|
||||
'fpga_n3000_reset': False} # is the FPGA reset
|
||||
PCI_DEV_6 = {'uuid': str(uuid.uuid4()),
|
||||
'name': 'pci_0000_b0_00_0',
|
||||
'pciaddr': '0000:b0:00.0',
|
||||
'pclass_id': '120000',
|
||||
'pvendor_id': '8086',
|
||||
'pdevice_id': '0b32', # N3000_DEFAULT_DEVICE
|
||||
'enabled': True,
|
||||
'fpga_n3000_reset': False} # is the FPGA reset
|
||||
|
||||
pci_device_dict_array2 = [PCI_DEV_3, PCI_DEV_4, PCI_DEV_5, PCI_DEV_6]
|
||||
|
||||
self.service.pci_device_update_by_host(self.context, host_uuid, pci_device_dict_array2)
|
||||
|
||||
dev = self.dbapi.pci_device_get(PCI_DEV_3['pciaddr'], host_id)
|
||||
for key in PCI_DEV_3:
|
||||
self.assertEqual(dev[key], PCI_DEV_3[key])
|
||||
|
||||
self.assertRaises(exception.ServerNotFound, self.dbapi.pci_device_get, PCI_DEV_4['pciaddr'], host_id)
|
||||
self.assertRaises(exception.ServerNotFound, self.dbapi.pci_device_get, PCI_DEV_5['pciaddr'], host_id)
|
||||
self.assertRaises(exception.ServerNotFound, self.dbapi.pci_device_get, PCI_DEV_6['pciaddr'], host_id)
|
||||
|
||||
# update existing dev
|
||||
pci_dev_dict_update = [{'pciaddr': PCI_DEV_2['pciaddr'],
|
||||
'pclass_id': '060500',
|
||||
'pvendor_id': '8086',
|
||||
'pdevice_id': '0d8f',
|
||||
'pclass': '0600',
|
||||
'pvendor': '',
|
||||
'psvendor': '',
|
||||
'psdevice': 'qat',
|
||||
'sriov_totalvfs': 32,
|
||||
'sriov_numvfs': 4,
|
||||
'sriov_vf_driver': 'vfio-pci',
|
||||
'sriov_vf_pdevice_id': '0d90',
|
||||
'sriov_vfs_pci_address': '000:b4:00.1,0000:b4:00.2,0000:b4:00.3,0000:b4:00.4',
|
||||
'driver': 'igb_uio',
|
||||
'fpga_n3000_reset': True}]
|
||||
self.service.pci_device_update_by_host(self.context, host_uuid, pci_dev_dict_update)
|
||||
dev = self.dbapi.pci_device_get(PCI_DEV_2['pciaddr'], host_id)
|
||||
|
||||
for key in pci_dev_dict_update[0]:
|
||||
self.assertEqual(dev[key], pci_dev_dict_update[0][key])
|
||||
|
||||
pci_dev_dict_update[0]['sriov_vfs_pci_address'] = ''
|
||||
pci_dev_dict_update[0]['fpga_n3000_reset'] = False
|
||||
self.service.pci_device_update_by_host(self.context, host_uuid, pci_dev_dict_update)
|
||||
dev = self.dbapi.pci_device_get(PCI_DEV_2['pciaddr'], host_id)
|
||||
self.assertNotEqual(dev['sriov_vfs_pci_address'], pci_dev_dict_update[0]['sriov_vfs_pci_address'])
|
||||
|
||||
def test_inumas_update_by_ihost(self):
|
||||
# Create compute-0 node
|
||||
config_uuid = str(uuid.uuid4())
|
||||
|
@ -1784,14 +1905,16 @@ class ManagerTestCase(base.DbTestCase):
|
|||
'pclass_id': '060100',
|
||||
'pvendor_id': '8086',
|
||||
'pdevice_id': '0443',
|
||||
'enabled': True}
|
||||
'enabled': True,
|
||||
'fpga_n3000_reset': True}
|
||||
PCI_DEV_2 = {'uuid': str(uuid.uuid4()),
|
||||
'name': 'pci_dev_2',
|
||||
'pciaddr': '0000:0c:01.0',
|
||||
'pclass_id': '012000',
|
||||
'pvendor_id': '8086',
|
||||
'pdevice_id': '0b30',
|
||||
'enabled': True}
|
||||
'enabled': True,
|
||||
'fpga_n3000_reset': True}
|
||||
pci_device_dict_array = [PCI_DEV_1, PCI_DEV_2]
|
||||
|
||||
# create new PCI dev
|
||||
|
|
Loading…
Reference in New Issue