NSX|V3: Change status code of SG failure

With multi-cluster, reboot of one backend manager can cause a
race condition on which FW section will 404 during SG realization.
This is temporary outage, so we should return 500 status code to the
user, rather than 404.

Change-Id: I7354703cd21b824d7e8a7b44b813c6799bf7f304
This commit is contained in:
Anna Khmelnitsky 2019-02-14 18:02:59 -08:00 committed by Adit Sarfaty
parent 2b8418926f
commit 39922fefc2
4 changed files with 49 additions and 12 deletions

View File

@ -23,6 +23,11 @@ class NsxPluginException(n_exc.NeutronException):
message = _("An unexpected error occurred in the NSX Plugin: %(err_msg)s")
class NsxPluginTemporaryError(n_exc.ServiceUnavailable):
message = _("Temporary error occurred in the NSX Plugin: %(err_msg)s."
" Please try again later")
class ClientCertificateException(NsxPluginException):
message = _("Client certificate error: %(err_msg)s")

View File

@ -166,6 +166,8 @@ class NsxPluginV3Base(agentschedulers_db.AZDhcpAgentSchedulerDbMixin,
webob.exc.HTTPBadRequest,
nsx_exc.NsxENSPortSecurity:
webob.exc.HTTPBadRequest,
nsx_exc.NsxPluginTemporaryError:
webob.exc.HTTPServiceUnavailable
})
def _get_conf_attr(self, attr):

View File

@ -3226,18 +3226,26 @@ class NsxV3Plugin(nsx_plugin_common.NsxPluginV3Base,
context, firewall_section['id'], ns_group['id'],
logging, action, sg_rules)
self.save_security_group_rule_mappings(context, rules['rules'])
except nsx_lib_exc.ManagerError:
with excutils.save_and_reraise_exception():
LOG.exception("Failed to create backend firewall rules "
"for security-group %(name)s (%(id)s), "
"rolling back changes.", secgroup_db)
# default security group deletion requires admin context
if default_sg:
context = context.elevated()
super(NsxV3Plugin, self).delete_security_group(
context, secgroup_db['id'])
self.nsxlib.ns_group.delete(ns_group['id'])
self.nsxlib.firewall_section.delete(firewall_section['id'])
except nsx_lib_exc.ManagerError as ex:
msg = ("Failed to create backend firewall rules "
"for security-group %(name)s (%(id)s), "
"rolling back changes." % secgroup_db)
LOG.exception(msg)
# default security group deletion requires admin context
if default_sg:
context = context.elevated()
super(NsxV3Plugin, self).delete_security_group(
context, secgroup_db['id'])
self.nsxlib.ns_group.delete(ns_group['id'])
self.nsxlib.firewall_section.delete(firewall_section['id'])
if ex.__class__ is nsx_lib_exc.ResourceNotFound:
# This may happen due to race condition during
# backend reboot. The exception raised should reflect
# short-term availability issue (500) rather than 404
raise nsx_exc.NsxPluginTemporaryError(err_msg=msg)
else:
raise ex
return secgroup_db

View File

@ -22,6 +22,8 @@ from vmware_nsxlib import v3 as nsxlib
from vmware_nsxlib.v3 import exceptions as nsxlib_exc
from vmware_nsxlib.v3 import nsx_constants as consts
from webob import exc
# Pool of fake ns-groups uuids
NSG_IDS = ['11111111-1111-1111-1111-111111111111',
@ -84,6 +86,26 @@ class TestSecurityGroups(test_nsxv3.NsxV3PluginTestCaseMixin,
for k, v, in keys:
self.assertEqual(rule['security_group_rule'][k], v)
def test_create_security_group_with_manager_error(self):
'''Reboot in multi-cluster environment may cause temporary 404 in
firewall section APIs. We should return 503 and not 404 to the user
'''
name = 'webservers'
description = 'my webservers'
with mock.patch("vmware_nsxlib.v3.security.NsxLibFirewallSection."
"create_section_rules",
side_effect=nsxlib_exc.ResourceNotFound):
try:
with self.security_group(name, description):
# This should not succeed
# (assertRaises would not work with generators)
self.assertTrue(False)
except exc.HTTPClientError:
pass
class TestSecurityGroupsNoDynamicCriteria(test_nsxv3.NsxV3PluginTestCaseMixin,
test_ext_sg.TestSecurityGroups):