Add disable-mlockall config

By default, mlockall() is enabled for ovs-vswitchd. This results in
locking all of ovs-vswitchd's process memory into physical RAM and
prevents paging. This enables network performance but can lead to
memory exhaustion in memory-constrained environments. To disable
mlockall(), the disable-mlockall charm config option can be set to
True. If unset, disable-mlockall charm config will result in
disabling mlockall if running in a container.

The drop_config.append(OVS_DEFAULT) logic is no longer used as
it prevents a rewrite of the config template when charm config is
reset. For example, the new behavior results in
/etc/default/openvswitch-switch being written with comments only
when the corresponding config options are disabled (see template),
resulting in openvswitch-switch being restarted.

Due to the removal of drop_config.append(OVS_DEFAULT), pause/resume
actions need to explicitly remove openswitch-switch to maintain
prior behavior for non-DPDK deployments. In other words, pause/resume
will not restart openvswitch-switch.

Closes-Bug: #1906280
Related-Bug: #1908615
Change-Id: I2e3153e90c7a4a1b7dec7d6df427b33a449f414d
This commit is contained in:
Corey Bryant 2021-01-05 14:42:44 +00:00
parent cb292d8158
commit 282f6af3db
9 changed files with 125 additions and 29 deletions

View File

@ -19,6 +19,10 @@ cleanup:
required: required:
- i-really-mean-it - i-really-mean-it
pause: pause:
description: Pause the neutron-openvswitch unit. This action will stop neutron-openvswitch services. description: |
Pause the neutron-openvswitch unit. This action will stop neutron-openvswitch services.
.
NOTE: Pausing neutron-openvswitch does not stop the openvswitch-switch service.
resume: resume:
descrpition: Resume the neutron-openvswitch unit. This action will start neutron-openvswitch services. descrpition: |
Resume the neutron-openvswitch unit. This action will start neutron-openvswitch services.

View File

@ -28,16 +28,18 @@ from neutron_ovs_utils import (
def pause(args): def pause(args):
"""Pause the Ceilometer services. """Pause the neutron-openvswitch services.
@raises Exception should the service fail to stop. @raises Exception should the service fail to stop.
""" """
pause_unit_helper(register_configs()) pause_unit_helper(register_configs(),
exclude_services=['openvswitch-switch'])
def resume(args): def resume(args):
"""Resume the Ceilometer services. """Resume the neutron-openvswitch services.
@raises Exception should the service fail to start.""" @raises Exception should the service fail to start."""
resume_unit_helper(register_configs()) resume_unit_helper(register_configs(),
exclude_services=['openvswitch-switch'])
# A dictionary of all the defined actions to callables (which take # A dictionary of all the defined actions to callables (which take

View File

@ -410,3 +410,22 @@ options:
The inactivity_probe interval in seconds for the local switch connection The inactivity_probe interval in seconds for the local switch connection
to the controller. A value of 0 disables inactivity probes. Used only to the controller. A value of 0 disables inactivity probes. Used only
for 'native' driver. The default value is 10 seconds. for 'native' driver. The default value is 10 seconds.
disable-mlockall:
type: boolean
default:
description: |
Disable Open vSwitch use of mlockall().
.
When mlockall() is enabled, all of ovs-vswitchd's process memory is
locked into physical RAM and prevented from paging. This avoids network
interruptions but can lead to memory exhaustion in memory-constrained
environments.
.
By default, the charm will disable mlockall() if it is running in a
container. Otherwise, the charm will default to mlockall() enabled if
it is not running in a container.
.
Changing this config option will restart openvswitch-switch, resulting
in an expected data plane outage while the service restarts.
.
(Available from Mitaka)

View File

@ -28,6 +28,7 @@ from charmhelpers.core.hookenv import (
) )
from charmhelpers.core.host import ( from charmhelpers.core.host import (
CompareHostReleases, CompareHostReleases,
is_container,
lsb_release, lsb_release,
write_file, write_file,
) )
@ -157,6 +158,23 @@ class OVSPluginContext(context.NeutronContext):
neutron_api_settings = NeutronAPIContext()() neutron_api_settings = NeutronAPIContext()()
return neutron_api_settings['neutron_security_groups'] return neutron_api_settings['neutron_security_groups']
def disable_mlockall(self):
'''
Determine if Open vSwitch use of mlockall() should be disabled
If the disable-mlockall config option is unset, mlockall will be
disabled if running in a container and will default to enabled if
not running in a container.
'''
disable_mlockall = config('disable-mlockall')
if disable_mlockall is None:
disable_mlockall = False
if is_container():
disable_mlockall = True
cmp_release = CompareOpenStackReleases(
os_release('neutron-common', base='icehouse'))
return (cmp_release >= 'mitaka' and disable_mlockall)
def ovs_ctxt(self): def ovs_ctxt(self):
# In addition to generating config context, ensure the OVS service # In addition to generating config context, ensure the OVS service
# is running and the OVS bridge exists. Also need to ensure # is running and the OVS bridge exists. Also need to ensure
@ -203,6 +221,7 @@ class OVSPluginContext(context.NeutronContext):
ovs_ctxt['enable_dpdk'] = conf['enable-dpdk'] ovs_ctxt['enable_dpdk'] = conf['enable-dpdk']
ovs_ctxt['keepalived_healthcheck_interval'] = \ ovs_ctxt['keepalived_healthcheck_interval'] = \
conf['keepalived-healthcheck-interval'] conf['keepalived-healthcheck-interval']
ovs_ctxt['disable_mlockall'] = self.disable_mlockall()
net_dev_mtu = neutron_api_settings.get('network_device_mtu') net_dev_mtu = neutron_api_settings.get('network_device_mtu')
if net_dev_mtu: if net_dev_mtu:

View File

@ -196,6 +196,7 @@ BASE_RESOURCE_MAP = OrderedDict([
(OVS_DEFAULT, { (OVS_DEFAULT, {
'services': ['openvswitch-switch'], 'services': ['openvswitch-switch'],
'contexts': [neutron_ovs_context.OVSDPDKDeviceContext(), 'contexts': [neutron_ovs_context.OVSDPDKDeviceContext(),
neutron_ovs_context.OVSPluginContext(),
neutron_ovs_context.RemoteRestartContext( neutron_ovs_context.RemoteRestartContext(
['neutron-plugin', 'neutron-control'])], ['neutron-plugin', 'neutron-control'])],
}), }),
@ -423,9 +424,6 @@ def resource_map():
) )
if not use_dpdk(): if not use_dpdk():
drop_config.append(DPDK_INTERFACES) drop_config.append(DPDK_INTERFACES)
drop_config.append(OVS_DEFAULT)
elif ovs_has_late_dpdk_init():
drop_config.append(OVS_DEFAULT)
else: else:
drop_config.extend([OVS_CONF, DPDK_INTERFACES]) drop_config.extend([OVS_CONF, DPDK_INTERFACES])
@ -464,15 +462,18 @@ def restart_map():
return {k: v['services'] for k, v in resource_map().items()} return {k: v['services'] for k, v in resource_map().items()}
def services(): def services(exclude_services=None):
"""Returns a list of (unique) services associate with this charm """Returns a list of (unique) services associate with this charm
Note that we drop the os-charm-phy-nic-mtu service as it's not an actual Note that we drop the os-charm-phy-nic-mtu service as it's not an actual
running service that we can check for. running service that we can check for.
@returns [strings] - list of service names suitable for (re)start_service() @returns [strings] - list of service names suitable for (re)start_service()
""" """
if exclude_services is None:
exclude_services = []
s_set = set(chain(*restart_map().values())) s_set = set(chain(*restart_map().values()))
s_set.discard('os-charm-phy-nic-mtu') s_set.discard('os-charm-phy-nic-mtu')
s_set = {s for s in s_set if s not in exclude_services}
return list(s_set) return list(s_set)
@ -1024,11 +1025,14 @@ def assess_status(configs):
@param configs: a templating.OSConfigRenderer() object @param configs: a templating.OSConfigRenderer() object
@returns None - this function is executed for its side-effect @returns None - this function is executed for its side-effect
""" """
assess_status_func(configs)() exclude_services = []
if is_unit_paused_set():
exclude_services = ['openvswitch-switch']
assess_status_func(configs, exclude_services)()
os_application_version_set(VERSION_PACKAGE) os_application_version_set(VERSION_PACKAGE)
def assess_status_func(configs): def assess_status_func(configs, exclude_services=None):
"""Helper function to create the function that will assess_status() for """Helper function to create the function that will assess_status() for
the unit. the unit.
Uses charmhelpers.contrib.openstack.utils.make_assess_status_func() to Uses charmhelpers.contrib.openstack.utils.make_assess_status_func() to
@ -1045,36 +1049,42 @@ def assess_status_func(configs):
@param configs: a templating.OSConfigRenderer() object @param configs: a templating.OSConfigRenderer() object
@return f() -> None : a function that assesses the unit's workload status @return f() -> None : a function that assesses the unit's workload status
""" """
if exclude_services is None:
exclude_services = []
required_interfaces = REQUIRED_INTERFACES.copy() required_interfaces = REQUIRED_INTERFACES.copy()
if enable_nova_metadata(): if enable_nova_metadata():
required_interfaces['neutron-plugin-api'] = ['neutron-plugin-api'] required_interfaces['neutron-plugin-api'] = ['neutron-plugin-api']
return make_assess_status_func( return make_assess_status_func(
configs, required_interfaces, configs, required_interfaces,
charm_func=validate_ovs_use_veth, charm_func=validate_ovs_use_veth,
services=services(), ports=None) services=services(exclude_services), ports=None)
def pause_unit_helper(configs): def pause_unit_helper(configs, exclude_services=None):
"""Helper function to pause a unit, and then call assess_status(...) in """Helper function to pause a unit, and then call assess_status(...) in
effect, so that the status is correctly updated. effect, so that the status is correctly updated.
Uses charmhelpers.contrib.openstack.utils.pause_unit() to do the work. Uses charmhelpers.contrib.openstack.utils.pause_unit() to do the work.
@param configs: a templating.OSConfigRenderer() object @param configs: a templating.OSConfigRenderer() object
@returns None - this function is executed for its side-effect @returns None - this function is executed for its side-effect
""" """
_pause_resume_helper(pause_unit, configs) if exclude_services is None:
exclude_services = []
_pause_resume_helper(pause_unit, configs, exclude_services)
def resume_unit_helper(configs): def resume_unit_helper(configs, exclude_services=None):
"""Helper function to resume a unit, and then call assess_status(...) in """Helper function to resume a unit, and then call assess_status(...) in
effect, so that the status is correctly updated. effect, so that the status is correctly updated.
Uses charmhelpers.contrib.openstack.utils.resume_unit() to do the work. Uses charmhelpers.contrib.openstack.utils.resume_unit() to do the work.
@param configs: a templating.OSConfigRenderer() object @param configs: a templating.OSConfigRenderer() object
@returns None - this function is executed for its side-effect @returns None - this function is executed for its side-effect
""" """
_pause_resume_helper(resume_unit, configs) if exclude_services is None:
exclude_services = []
_pause_resume_helper(resume_unit, configs, exclude_services)
def _pause_resume_helper(f, configs): def _pause_resume_helper(f, configs, exclude_services=None):
"""Helper function that uses the make_assess_status_func(...) from """Helper function that uses the make_assess_status_func(...) from
charmhelpers.contrib.openstack.utils to create an assess_status(...) charmhelpers.contrib.openstack.utils to create an assess_status(...)
function that can be used with the pause/resume of the unit function that can be used with the pause/resume of the unit
@ -1083,8 +1093,10 @@ def _pause_resume_helper(f, configs):
""" """
# TODO(ajkavanagh) - ports= has been left off because of the race hazard # TODO(ajkavanagh) - ports= has been left off because of the race hazard
# that exists due to service_start() # that exists due to service_start()
f(assess_status_func(configs), if exclude_services is None:
services=services(), exclude_services = []
f(assess_status_func(configs, exclude_services),
services=services(exclude_services),
ports=None) ports=None)

View File

@ -9,3 +9,6 @@
{% if dpdk_enabled -%} {% if dpdk_enabled -%}
DPDK_OPTS='--dpdk -c {{ cpu_mask }} -n 4 --socket-mem {{ socket_memory }} {{ device_whitelist }} --vhost-owner libvirt-qemu:kvm --vhost-perm 0660' DPDK_OPTS='--dpdk -c {{ cpu_mask }} -n 4 --socket-mem {{ socket_memory }} {{ device_whitelist }} --vhost-owner libvirt-qemu:kvm --vhost-perm 0660'
{% endif -%} {% endif -%}
{% if disable_mlockall -%}
OVS_CTL_OPTS='--no-mlockall'
{% endif -%}

View File

@ -135,7 +135,8 @@ class OVSPluginContextTest(CharmTestCase):
'security-group-log-rate-limit': None, 'security-group-log-rate-limit': None,
'security-group-log-burst-limit': 25, 'security-group-log-burst-limit': 25,
'keepalived-healthcheck-interval': 0, 'keepalived-healthcheck-interval': 0,
'of-inactivity-probe': 10} 'of-inactivity-probe': 10,
'disable-mlockall': False}
def mock_config(key=None): def mock_config(key=None):
if key: if key:
@ -195,6 +196,7 @@ class OVSPluginContextTest(CharmTestCase):
'nsg_log_burst_limit': 25, 'nsg_log_burst_limit': 25,
'keepalived_healthcheck_interval': 0, 'keepalived_healthcheck_interval': 0,
'of_inactivity_probe': 10, 'of_inactivity_probe': 10,
'disable_mlockall': False,
} }
self.assertEqual(expect, napi_ctxt()) self.assertEqual(expect, napi_ctxt())
@ -277,10 +279,39 @@ class OVSPluginContextTest(CharmTestCase):
'nsg_log_burst_limit': 25, 'nsg_log_burst_limit': 25,
'keepalived_healthcheck_interval': 30, 'keepalived_healthcheck_interval': 30,
'of_inactivity_probe': 10, 'of_inactivity_probe': 10,
'disable_mlockall': False,
} }
self.maxDiff = None self.maxDiff = None
self.assertEqual(expect, napi_ctxt()) self.assertEqual(expect, napi_ctxt())
@patch.object(context, 'is_container')
@patch.object(context, 'os_release')
def test_disable_mlockall(self, _os_release, _is_container):
_os_release.return_value = 'victoria'
_is_container.return_value = True
ovsp_ctxt = context.OVSPluginContext()
self.assertTrue(ovsp_ctxt.disable_mlockall())
_is_container.return_value = False
ovsp_ctxt = context.OVSPluginContext()
self.assertFalse(ovsp_ctxt.disable_mlockall())
_os_release.return_value = 'liberty'
ovsp_ctxt = context.OVSPluginContext()
self.test_config.set('disable-mlockall', True)
self.assertFalse(ovsp_ctxt.disable_mlockall())
_os_release.return_value = 'mitaka'
ovsp_ctxt = context.OVSPluginContext()
self.test_config.set('disable-mlockall', True)
self.assertTrue(ovsp_ctxt.disable_mlockall())
_os_release.return_value = 'victoria'
ovsp_ctxt = context.OVSPluginContext()
self.test_config.set('disable-mlockall', False)
self.assertFalse(ovsp_ctxt.disable_mlockall())
class ZoneContextTest(CharmTestCase): class ZoneContextTest(CharmTestCase):

View File

@ -458,6 +458,7 @@ class TestNeutronOVSUtils(CharmTestCase):
_regconfs = nutils.register_configs() _regconfs = nutils.register_configs()
confs = ['/etc/neutron/neutron.conf', confs = ['/etc/neutron/neutron.conf',
'/etc/neutron/plugins/ml2/openvswitch_agent.ini', '/etc/neutron/plugins/ml2/openvswitch_agent.ini',
'/etc/default/openvswitch-switch',
'/etc/init/os-charm-phy-nic-mtu.conf'] '/etc/init/os-charm-phy-nic-mtu.conf']
self.assertEqual(_regconfs.configs, confs) self.assertEqual(_regconfs.configs, confs)
@ -582,11 +583,12 @@ class TestNeutronOVSUtils(CharmTestCase):
expect = OrderedDict([ expect = OrderedDict([
(nutils.NEUTRON_CONF, ['neutron-openvswitch-agent']), (nutils.NEUTRON_CONF, ['neutron-openvswitch-agent']),
(ML2CONF, ['neutron-openvswitch-agent']), (ML2CONF, ['neutron-openvswitch-agent']),
(nutils.OVS_DEFAULT, ['openvswitch-switch']),
]) ])
for item in _restart_map: for item in _restart_map:
self.assertTrue(item in _restart_map) self.assertTrue(item in _restart_map)
self.assertTrue(expect[item] == _restart_map[item]) self.assertTrue(expect[item] == _restart_map[item])
self.assertEqual(len(_restart_map.keys()), 2) self.assertEqual(len(_restart_map.keys()), 3)
@patch('charmhelpers.contrib.openstack.context.list_nics', @patch('charmhelpers.contrib.openstack.context.list_nics',
return_value=['eth0']) return_value=['eth0'])
@ -869,7 +871,7 @@ class TestNeutronOVSUtils(CharmTestCase):
callee = MagicMock() callee = MagicMock()
asf.return_value = callee asf.return_value = callee
nutils.assess_status('test-config') nutils.assess_status('test-config')
asf.assert_called_once_with('test-config') asf.assert_called_once_with('test-config', ['openvswitch-switch'])
callee.assert_called_once_with() callee.assert_called_once_with()
self.os_application_version_set.assert_called_with( self.os_application_version_set.assert_called_with(
nutils.VERSION_PACKAGE nutils.VERSION_PACKAGE
@ -902,10 +904,12 @@ class TestNeutronOVSUtils(CharmTestCase):
def test_pause_unit_helper(self): def test_pause_unit_helper(self):
with patch.object(nutils, '_pause_resume_helper') as prh: with patch.object(nutils, '_pause_resume_helper') as prh:
nutils.pause_unit_helper('random-config') nutils.pause_unit_helper('random-config')
prh.assert_called_once_with(nutils.pause_unit, 'random-config') prh.assert_called_once_with(nutils.pause_unit,
'random-config', [])
with patch.object(nutils, '_pause_resume_helper') as prh: with patch.object(nutils, '_pause_resume_helper') as prh:
nutils.resume_unit_helper('random-config') nutils.resume_unit_helper('random-config')
prh.assert_called_once_with(nutils.resume_unit, 'random-config') prh.assert_called_once_with(nutils.resume_unit,
'random-config', [])
@patch.object(nutils, 'services') @patch.object(nutils, 'services')
@patch.object(nutils, 'determine_ports') @patch.object(nutils, 'determine_ports')
@ -916,7 +920,7 @@ class TestNeutronOVSUtils(CharmTestCase):
with patch.object(nutils, 'assess_status_func') as asf: with patch.object(nutils, 'assess_status_func') as asf:
asf.return_value = 'assessor' asf.return_value = 'assessor'
nutils._pause_resume_helper(f, 'some-config') nutils._pause_resume_helper(f, 'some-config')
asf.assert_called_once_with('some-config') asf.assert_called_once_with('some-config', [])
# ports=None whilst port checks are disabled. # ports=None whilst port checks are disabled.
f.assert_called_once_with('assessor', services='s1', ports=None) f.assert_called_once_with('assessor', services='s1', ports=None)

View File

@ -30,7 +30,8 @@ class PauseTestCase(CharmTestCase):
def test_pauses_services(self): def test_pauses_services(self):
actions.pause([]) actions.pause([])
self.pause_unit_helper.assert_called_once_with('test-config') self.pause_unit_helper.assert_called_once_with(
'test-config', exclude_services=['openvswitch-switch'])
class ResumeTestCase(CharmTestCase): class ResumeTestCase(CharmTestCase):
@ -41,7 +42,8 @@ class ResumeTestCase(CharmTestCase):
def test_pauses_services(self): def test_pauses_services(self):
actions.resume([]) actions.resume([])
self.resume_unit_helper.assert_called_once_with('test-config') self.resume_unit_helper.assert_called_once_with(
'test-config', exclude_services=['openvswitch-switch'])
class MainTestCase(CharmTestCase): class MainTestCase(CharmTestCase):