Revert "[HA] Do not add initial state change delay in HA router"
This reverts commitc20f2e5136
. The fix of bug #1945512 reintroduced bug #1837635 as after the initial backup state ha router can transition to 'primary' state on multiple hosts and due to this delay multiple routers get into 'active' ha_state even if one of the host quickly transition to backup after the primary state. The issue got visible since ha router fullstack tests are added as part of [1]. [1] https://review.opendev.org/c/openstack/neutron/+/917429 Related-Bug: #1837635 Related-Bug: #1945512 Related-Bug: #2083609 Change-Id: I83b53a07362861da98b8361dafd95e94e5048322 (cherry picked from commita3689956dd
) Conflicts: neutron/agent/l3/ha.py neutron/tests/unit/agent/l3/test_agent.py
This commit is contained in:
parent
b962109dff
commit
549af9e07c
@ -17,9 +17,6 @@ import os
|
|||||||
import threading
|
import threading
|
||||||
|
|
||||||
import eventlet
|
import eventlet
|
||||||
from neutron_lib.callbacks import events
|
|
||||||
from neutron_lib.callbacks import registry
|
|
||||||
from neutron_lib.callbacks import resources
|
|
||||||
from neutron_lib import constants
|
from neutron_lib import constants
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
from oslo_utils import fileutils
|
from oslo_utils import fileutils
|
||||||
@ -79,7 +76,6 @@ class L3AgentKeepalivedStateChangeServer(object):
|
|||||||
server.wait()
|
server.wait()
|
||||||
|
|
||||||
|
|
||||||
@registry.has_registry_receivers
|
|
||||||
class AgentMixin(object):
|
class AgentMixin(object):
|
||||||
def __init__(self, host):
|
def __init__(self, host):
|
||||||
self._init_ha_conf_path()
|
self._init_ha_conf_path()
|
||||||
@ -91,13 +87,6 @@ class AgentMixin(object):
|
|||||||
eventlet.spawn(self._start_keepalived_notifications_server)
|
eventlet.spawn(self._start_keepalived_notifications_server)
|
||||||
self._transition_states = {}
|
self._transition_states = {}
|
||||||
self._transition_state_mutex = threading.Lock()
|
self._transition_state_mutex = threading.Lock()
|
||||||
self._initial_state_change_per_router = set()
|
|
||||||
|
|
||||||
def initial_state_change(self, router_id):
|
|
||||||
initial_state = router_id not in self._initial_state_change_per_router
|
|
||||||
if initial_state:
|
|
||||||
self._initial_state_change_per_router.add(router_id)
|
|
||||||
return initial_state
|
|
||||||
|
|
||||||
def _get_router_info(self, router_id):
|
def _get_router_info(self, router_id):
|
||||||
try:
|
try:
|
||||||
@ -106,13 +95,6 @@ class AgentMixin(object):
|
|||||||
LOG.info('Router %s is not managed by this agent. It was '
|
LOG.info('Router %s is not managed by this agent. It was '
|
||||||
'possibly deleted concurrently.', router_id)
|
'possibly deleted concurrently.', router_id)
|
||||||
|
|
||||||
@registry.receives(resources.ROUTER, [events.AFTER_DELETE])
|
|
||||||
def _delete_router(self, resource, event, trigger, payload):
|
|
||||||
try:
|
|
||||||
self._initial_state_change_per_router.remove(payload.resource_id)
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def check_ha_state_for_router(self, router_id, current_state):
|
def check_ha_state_for_router(self, router_id, current_state):
|
||||||
ri = self._get_router_info(router_id)
|
ri = self._get_router_info(router_id)
|
||||||
if not ri:
|
if not ri:
|
||||||
@ -166,7 +148,7 @@ class AgentMixin(object):
|
|||||||
|
|
||||||
def _enqueue_state_change(self, router_id, state):
|
def _enqueue_state_change(self, router_id, state):
|
||||||
# NOTE(ralonsoh): move 'primary' and 'backup' constants to n-lib
|
# NOTE(ralonsoh): move 'primary' and 'backup' constants to n-lib
|
||||||
if state == 'primary' and not self.initial_state_change(router_id):
|
if state == 'primary':
|
||||||
eventlet.sleep(self.conf.ha_vrrp_advert_int)
|
eventlet.sleep(self.conf.ha_vrrp_advert_int)
|
||||||
transition_state = self._update_transition_state(router_id)
|
transition_state = self._update_transition_state(router_id)
|
||||||
if transition_state != state:
|
if transition_state != state:
|
||||||
|
@ -274,15 +274,7 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
|||||||
self._enqueue_state_change_transitions(['backup'], 1)
|
self._enqueue_state_change_transitions(['backup'], 1)
|
||||||
|
|
||||||
def test_enqueue_state_change_from_none_to_primary_to_backup(self):
|
def test_enqueue_state_change_from_none_to_primary_to_backup(self):
|
||||||
# First transition (to primary), won't have a delay.
|
self._enqueue_state_change_transitions(['primary', 'backup'], 0)
|
||||||
self._enqueue_state_change_transitions(['primary', 'backup'], 2)
|
|
||||||
|
|
||||||
def test_enqueue_state_change_from_none_to_primary_to_backup_twice(self):
|
|
||||||
# Second transition to primary will have a delay and will be overridden
|
|
||||||
# by the second transition to backup; that means the transition from
|
|
||||||
# backup (second state) to primary (third state) is dismissed.
|
|
||||||
self._enqueue_state_change_transitions(
|
|
||||||
['primary', 'backup', 'primary', 'backup'], 2)
|
|
||||||
|
|
||||||
def test_enqueue_state_change_from_none_to_backup_to_primary(self):
|
def test_enqueue_state_change_from_none_to_backup_to_primary(self):
|
||||||
self._enqueue_state_change_transitions(['backup', 'primary'], 2)
|
self._enqueue_state_change_transitions(['backup', 'primary'], 2)
|
||||||
@ -2630,21 +2622,6 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
|||||||
agent.router_removed_from_agent(None, {'router_id': FAKE_ID})
|
agent.router_removed_from_agent(None, {'router_id': FAKE_ID})
|
||||||
self.assertEqual(1, agent._queue.add.call_count)
|
self.assertEqual(1, agent._queue.add.call_count)
|
||||||
|
|
||||||
@mock.patch.object(metadata_driver.MetadataDriver,
|
|
||||||
'destroy_monitored_metadata_proxy')
|
|
||||||
def test__router_removed(self, *args):
|
|
||||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
|
||||||
ri = mock.Mock(router={'id': 'router_id'})
|
|
||||||
agent._initial_state_change_per_router.add('router_id')
|
|
||||||
self.assertEqual({'router_id'}, agent._initial_state_change_per_router)
|
|
||||||
for _ in range(2):
|
|
||||||
# The second call will trigger a KeyError exception in
|
|
||||||
# AgentMixin._delete_router that should be dismissed.
|
|
||||||
agent.router_info['router_id'] = mock.ANY
|
|
||||||
agent.pd = mock.Mock(routers={'router_id': {'subnets': []}})
|
|
||||||
agent._router_removed(ri, 'router_id')
|
|
||||||
self.assertEqual(set([]), agent._initial_state_change_per_router)
|
|
||||||
|
|
||||||
def test_added_to_agent(self):
|
def test_added_to_agent(self):
|
||||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||||
agent._queue = mock.Mock()
|
agent._queue = mock.Mock()
|
||||||
|
Loading…
Reference in New Issue
Block a user