diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py index 9927767997c..d104f767beb 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/maintenance.py @@ -1030,6 +1030,63 @@ class DBInconsistenciesPeriodics(SchemaAwarePeriodicsBase): from_reload=True) raise periodics.NeverAgain() + @periodics.periodic(spacing=300, run_immediately=True) + def remove_duplicated_chassis_registers(self): + """Remove the "Chassis" and "Chassis_Private" duplicated registers. + + When the ovn-controller service of a node is updated and the system-id + is changed, if the old service is not stopped gracefully, it will leave + a "Chassis" and a "Chassis_Private" registers on the OVN SB database. + These leftovers must be removed. + + NOTE: this method is executed every 5 minutes. If a new chassis is + added, this method will perform again the clean-up process. + + NOTE: this method can be executed only if the OVN SB has the + "Chassis_Private" table. Otherwise, is not possible to find out which + register is newer and thus must be kept in the database. + """ + if not self._sb_idl.is_table_present('Chassis_Private'): + raise periodics.NeverAgain() + + if not self.has_lock: + return + + # dup_chassis_port_host = {host_name: [(ch1, ch_private1), + # (ch2, ch_private2), ... ]} + dup_chassis_port_host = {} + chassis = self._sb_idl.chassis_list().execute(check_error=True) + chassis_hostnames = {ch.hostname for ch in chassis} + # Find the duplicated "Chassis" and "Chassis_Private" registers, + # comparing the hostname. + for hostname in chassis_hostnames: + ch_list = [] + # Find these chassis matching the hostname and create a list. + for ch in (ch for ch in chassis if ch.hostname == hostname): + ch_private = self._sb_idl.lookup('Chassis_Private', ch.name, + default=None) + if ch_private: + ch_list.append((ch, ch_private)) + + # If the chassis list > 1, then we have duplicated chassis. + if len(ch_list) > 1: + # Order ch_list by Chassis_Private.nb_cfg_timestamp, from newer + # (greater value) to older. + ch_list.sort(key=lambda x: x[1].nb_cfg_timestamp, reverse=True) + dup_chassis_port_host[hostname] = ch_list + + if not dup_chassis_port_host: + return + + # Remove the "Chassis" and "Chassis_Private" registers with the + # older Chassis_Private.nb_cfg_timestamp. + with self._sb_idl.transaction(check_error=True) as txn: + for ch_list in dup_chassis_port_host.values(): + # The first item is skipped, this is the newest element. + for ch, ch_private in ch_list[1:]: + for table in ('Chassis_Private', 'Chassis'): + txn.add(self._sb_idl.db_destroy(table, ch.name)) + class HashRingHealthCheckPeriodics(object): diff --git a/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py b/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py index 256e47a5c06..c4a8e226cf3 100644 --- a/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py +++ b/neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_maintenance.py @@ -972,6 +972,64 @@ class TestMaintenance(_TestMaintenanceHelper): lsp = self.nb_api.lookup('Logical_Switch_Port', p1['id']) self.assertEqual(hcg_uuid, lsp.ha_chassis_group[0].uuid) + def test_remove_duplicated_chassis_registers(self): + hostnames = ['host1', 'host2'] + for hostname in hostnames: + for _ in range(3): + self.add_fake_chassis(hostname) + + chassis = self.sb_api.chassis_list().execute(check_error=True) + self.assertEqual(6, len(chassis)) + # Make the chassis private timestamp different + for idx, ch in enumerate(chassis): + self.sb_api.db_set('Chassis_Private', ch.name, + ('nb_cfg_timestamp', idx)).execute() + + ch_private_dict = {} # host: [ch_private1, ch_private2, ...] + for hostname in hostnames: + ch_private_list = [] + for ch in (ch for ch in chassis if ch.hostname == hostname): + ch_private = self.sb_api.lookup('Chassis_Private', ch.name, + default=None) + if ch_private: + # One of the "Chassis_Private" has been deleted on purpose + # in this test. + ch_private_list.append(ch_private) + ch_private_list.sort(key=lambda x: x.nb_cfg_timestamp, + reverse=True) + ch_private_dict[hostname] = ch_private_list + + self.maint.remove_duplicated_chassis_registers() + chassis_result = self.sb_api.chassis_list().execute(check_error=True) + self.assertEqual(2, len(chassis_result)) + for ch in chassis_result: + self.assertIn(ch.hostname, hostnames) + hostnames.remove(ch.hostname) + # From ch_private_dict[ch.hostname], we retrieve the first + # "Chassis_Private" register because these are ordered by + # timestamp. The newer one (bigger timestamp) should remain in the + # system. + ch_expected = ch_private_dict[ch.hostname][0].chassis[0] + self.assertEqual(ch_expected.name, ch.name) + + def test_remove_duplicated_chassis_registers_no_ch_private_register(self): + for _ in range(2): + self.add_fake_chassis('host1') + + chassis = self.sb_api.chassis_list().execute(check_error=True) + self.assertEqual(2, len(chassis)) + # Make the chassis private timestamp different + # Delete on of the "Chassis_Private" registers. + self.sb_api.db_destroy('Chassis_Private', chassis[0].name).execute() + self.sb_api.db_set('Chassis_Private', chassis[1].name, + ('nb_cfg_timestamp', 1)).execute() + + self.maint.remove_duplicated_chassis_registers() + chassis_result = self.sb_api.chassis_list().execute(check_error=True) + # Both "Chassis" registers are still in the DB because one + # "Chassis_Private" register was missing. + self.assertEqual(2, len(chassis_result)) + class TestLogMaintenance(_TestMaintenanceHelper, test_log_driver.LogApiTestCaseBase): diff --git a/releasenotes/notes/remove_duplicated_ovn_chassis-df12fb6233ea3d3e.yaml b/releasenotes/notes/remove_duplicated_ovn_chassis-df12fb6233ea3d3e.yaml new file mode 100644 index 00000000000..cf021363d4a --- /dev/null +++ b/releasenotes/notes/remove_duplicated_ovn_chassis-df12fb6233ea3d3e.yaml @@ -0,0 +1,17 @@ +--- +issues: + - | + When using ML2/OVN, during an upgrade procedure, the OVS system-id stored + value can be changed. The ovn-controller service will create the "Chassis" + and "Chassis_Private" registers based on this OVS system-id. If the + ovn-controller process is not gracefully stopped, that could lead to the + existence of duplicated "Chassis" and "Chassis_Private" registers in the + OVN Southbound database. +fixes: + - | + A new OVN maintenance method ``remove_duplicated_chassis_registers`` is + added. This method will periodically check the OVN Southbound "Chassis" + and "Chassis_Private" tables looking for duplicated registers. The older + ones (based on the "Chassis_Private.nb_cfg_timestamp" value) will be + removed when more than one register has the same hostname, that should + be unique.