From 291048aba2fcaec8afee3011fb4c12c6ca0ebbba Mon Sep 17 00:00:00 2001 From: armando-migliaccio Date: Fri, 1 Nov 2013 15:47:22 -0700 Subject: [PATCH] Tune up report and downtime intervals for l2 agent If the neutron server erroneously thinks than the l2 agent is down it will fail to bind a port, which can lead to VM's spawn errors. However, the issue is only transient because the agent effectively is only 'late' in reporting back. Best solution would be an alpha-count algorithm (so that we can detect persistent failures more reliably), but for now let's be more tolerant assuming that the agent is down by waiting at least twice the report interval plus a tiny teeny bit. Change-Id: I544135ce1f6b7eaefb34ac44af8f5844d92ddd95 Close-bug: #1244255 --- etc/neutron.conf | 9 +++++---- neutron/agent/common/config.py | 4 +++- neutron/db/agents_db.py | 6 ++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/etc/neutron.conf b/etc/neutron.conf index 5c31cb2f318..83a6dbe23de 100644 --- a/etc/neutron.conf +++ b/etc/neutron.conf @@ -211,8 +211,9 @@ notification_driver = neutron.openstack.common.notifier.rpc_notifier # max_fixed_ips_per_port = 5 # =========== items for agent management extension ============= -# Seconds to regard the agent as down. -# agent_down_time = 5 +# Seconds to regard the agent as down; should be at least twice +# report_interval, to be sure the agent is down for good +# agent_down_time = 9 # =========== end of items for agent management extension ===== # =========== items for agent scheduler extension ============= @@ -301,8 +302,8 @@ notification_driver = neutron.openstack.common.notifier.rpc_notifier # root_helper = sudo # =========== items for agent management extension ============= -# seconds between nodes reporting state to server, should be less than -# agent_down_time +# seconds between nodes reporting state to server; should be less than +# agent_down_time, best if it is half or less than agent_down_time # report_interval = 4 # =========== end of items for agent management extension ===== diff --git a/neutron/agent/common/config.py b/neutron/agent/common/config.py index e00d43cc972..9d5f5b1c48e 100644 --- a/neutron/agent/common/config.py +++ b/neutron/agent/common/config.py @@ -33,7 +33,9 @@ ROOT_HELPER_OPTS = [ AGENT_STATE_OPTS = [ cfg.FloatOpt('report_interval', default=4, - help=_('Seconds between nodes reporting state to server')), + help=_('Seconds between nodes reporting state to server; ' + 'should be less than agent_down_time, best if it ' + 'is half or less than agent_down_time.')), ] diff --git a/neutron/db/agents_db.py b/neutron/db/agents_db.py index d04f7faf56b..e095a4c2a74 100644 --- a/neutron/db/agents_db.py +++ b/neutron/db/agents_db.py @@ -31,8 +31,10 @@ from neutron.openstack.common import timeutils LOG = logging.getLogger(__name__) cfg.CONF.register_opt( - cfg.IntOpt('agent_down_time', default=5, - help=_("Seconds to regard the agent is down."))) + cfg.IntOpt('agent_down_time', default=9, + help=_("Seconds to regard the agent is down; should be at " + "least twice report_interval, to be sure the " + "agent is down for good."))) class Agent(model_base.BASEV2, models_v2.HasId):