Network agent rebalance/reschedule config changes

In the unlikely event that the network agent
rebalance/reschedule state machines are moving a resource
from one agent to another, and something happens to the host
agent, VIM,  or neutron during this small window of time, it
may be possible for a resource (dhcp network or router) to be
left unattached to an agent.  In order for these resources
to be recovered and reattached to an agent, this update
will turn the network_auto_schedule and router_auto_schedule
neutron config parameters back on.  These were originally
turned off in order to prevent potential race conditions with
the rebalance/reschedule algorithms, but after further
review this was not considered to be an issue. They are
therefore being turned back on.

In addition, during host unlocks the VIM was being restarted
causing the rebalance/reschedule state machines to lose
configuration and potentially uncleanly aborted mid-
operation.  A change is made to only restart the VIM
on initial application apply.

Story: 652151
Task: 30561

Change-Id: I73a069e478200fa4ee9eb959eceecc2dc598eecc
Signed-off-by: Kevin Smith <kevin.smith@windriver.com>
This commit is contained in:
Kevin Smith 2019-04-17 09:41:02 -04:00
parent e1ac03dab8
commit 61f29db061
5 changed files with 14 additions and 16 deletions

View File

@ -1163,14 +1163,6 @@ data:
# Increase from default of 75 seconds to avoid agents being declared
# down during controller swacts, reboots, etc...
agent_down_time: 180
# Set to false so as to remove conflict with newly introduced
# network rebalancing/rescheduling that will move routers off
# down l3 agents, rebalance routers to newly up l3 agents.
router_auto_schedule: false
# Set to false so as to remove conflict with newly introduced
# network rebalancing/rescheduling that will move networks off
# down DHCP agents, rebalance networks to newly up DHCP agents.
network_auto_schedule: false
agent:
root_helper: sudo
vhost:

View File

@ -2490,7 +2490,7 @@ class HostController(rest.RestController):
db_app.progress = None
db_app.save()
pecan.request.rpcapi.perform_app_apply(
pecan.request.context, db_app)
pecan.request.context, db_app, False)
else:
LOG.info("%s system app is present but not applied, "
"skipping re-apply" % constants.HELM_APP_OPENSTACK)

View File

@ -282,11 +282,12 @@ class KubeAppController(rest.RestController):
raise wsme.exc.ClientSideError(_(
"Application-apply rejected: operation is not allowed "
"while the current status is {}.".format(db_app.status)))
app_not_already_applied = (db_app.status != constants.APP_APPLY_SUCCESS)
db_app.status = constants.APP_APPLY_IN_PROGRESS
db_app.progress = None
db_app.save()
pecan.request.rpcapi.perform_app_apply(pecan.request.context,
db_app)
db_app, app_not_already_applied)
return KubeApp.convert_with_links(db_app)
else:
if db_app.status not in [constants.APP_APPLY_SUCCESS,

View File

@ -10846,17 +10846,19 @@ class ConductorManager(service.PeriodicService):
"""
self._app.perform_app_upload(rpc_app, tarfile)
def perform_app_apply(self, context, rpc_app):
def perform_app_apply(self, context, rpc_app, app_not_already_applied):
"""Handling of application install request (via AppOperator)
:param context: request context.
:param rpc_app: data object provided in the rpc request
:param app_not_already_applied: app not yet successfully applied
"""
app_installed = self._app.perform_app_apply(rpc_app)
if app_installed:
if app_installed and app_not_already_applied:
# Update the VIM configuration as it may need to manage the newly
# installed application.
# installed application. Only do this if the application
# was not already applied.
self._update_vim_config(context)
return app_installed

View File

@ -1768,15 +1768,18 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
rpc_app=rpc_app,
tarfile=tarfile))
def perform_app_apply(self, context, rpc_app):
def perform_app_apply(self, context, rpc_app, app_not_already_applied):
"""Handle application apply request
:param context: request context.
:param rpc_app: data object provided in the rpc request
:param app_not_already_applied: app not already succesfully applied
"""
return self.cast(context,
self.make_msg('perform_app_apply',
rpc_app=rpc_app))
self.make_msg(
'perform_app_apply',
rpc_app=rpc_app,
app_not_already_applied=app_not_already_applied))
def perform_app_remove(self, context, rpc_app):
"""Handle application remove request