diff --git a/doc/source/admin/config-wsgi.rst b/doc/source/admin/config-wsgi.rst index 57ed576f747..df8b42f70ad 100644 --- a/doc/source/admin/config-wsgi.rst +++ b/doc/source/admin/config-wsgi.rst @@ -127,3 +127,25 @@ serve this job: # /usr/bin/neutron-rpc-server --config-file /etc/neutron/neutron.conf --config-file /etc/neutron/plugins/ml2/ml2_conf.ini .. end + +Neutron Worker Processes +------------------------ + +Neutron will attempt to spawn a number of child processes for handling API +and RPC requests. The number of API workers is set to the number of CPU +cores, further limited by available memory, and the number of RPC workers +is set to half that number. + +It is strongly recommended that all deployers set these values themselves, +via the api_workers and rpc_workers configuration parameters. + +For a cloud with a high load to a relatively small number of objects, +a smaller value for api_workers will provide better performance than +many (somewhere around 4-8.) For a cloud with a high load to lots of +different objects, then the more the better. Budget neutron-server +using about 2GB of RAM in steady-state. + +For rpc_workers, there needs to be enough to keep up with incoming +events from the various neutron agents. Signs that there are too few +can be agent heartbeats arriving late, nova vif bindings timing out +on the hypervisors, or rpc message timeout exceptions in agent logs. diff --git a/neutron/cmd/status.py b/neutron/cmd/status.py index 5e1c97c7580..bd3c0610cec 100644 --- a/neutron/cmd/status.py +++ b/neutron/cmd/status.py @@ -18,6 +18,7 @@ from oslo_log import log as logging from oslo_upgradecheck import upgradecheck from neutron.conf import common as neutron_conf_base +from neutron.conf import service as neutron_conf_service CHECKS_ENTRYPOINTS = 'neutron.status.upgrade.checks' LOG = logging.getLogger(__name__) @@ -50,6 +51,8 @@ def setup_conf(conf=cfg.CONF): """ neutron_conf_base.register_core_common_config_opts(conf) + neutron_conf_service.register_service_opts( + neutron_conf_service.service_opts, cfg.CONF) return conf diff --git a/neutron/cmd/upgrade_checks/checks.py b/neutron/cmd/upgrade_checks/checks.py index 2cbc3750f31..f1b8cb3efe6 100644 --- a/neutron/cmd/upgrade_checks/checks.py +++ b/neutron/cmd/upgrade_checks/checks.py @@ -12,6 +12,7 @@ # License for the specific language governing permissions and limitations # under the License. +from oslo_config import cfg from oslo_upgradecheck import upgradecheck from neutron._i18n import _ @@ -22,12 +23,19 @@ class CoreChecks(base.BaseChecks): def get_checks(self): return [ - (_("Check nothing"), self.noop_check) + (_("Worker counts configured"), self.worker_count_check) ] @staticmethod - def noop_check(checker): - # NOTE(slaweq) This is only example Noop check, it can be removed when - # some real check methods will be added - return upgradecheck.Result( - upgradecheck.Code.SUCCESS, _("Always succeed (placeholder)")) + def worker_count_check(checker): + + if cfg.CONF.api_workers and cfg.CONF.rpc_workers: + return upgradecheck.Result( + upgradecheck.Code.SUCCESS, _("Number of workers already " + "defined in config")) + else: + return upgradecheck.Result( + upgradecheck.Code.WARNING, _("The default number of workers " + "has changed. Please see release notes for the new values, " + "but it is strongly encouraged for deployers to manually set " + "the values for api_workers and rpc_workers.")) diff --git a/neutron/conf/service.py b/neutron/conf/service.py index d68e0229b5f..b652ec4e28b 100644 --- a/neutron/conf/service.py +++ b/neutron/conf/service.py @@ -26,10 +26,12 @@ service_opts = [ cfg.IntOpt('api_workers', help=_('Number of separate API worker processes for service. ' 'If not specified, the default is equal to the number ' - 'of CPUs available for best performance.')), + 'of CPUs available for best performance, capped by ' + 'potential RAM usage.')), cfg.IntOpt('rpc_workers', - default=1, - help=_('Number of RPC worker processes for service.')), + help=_('Number of RPC worker processes for service. ' + 'If not specified, the default is equal to half the ' + 'number of API workers.')), cfg.IntOpt('rpc_state_report_workers', default=1, help=_('Number of RPC worker processes dedicated to state ' diff --git a/neutron/service.py b/neutron/service.py index 50a859c5475..23c9162d776 100644 --- a/neutron/service.py +++ b/neutron/service.py @@ -33,6 +33,7 @@ from oslo_service import loopingcall from oslo_service import service as common_service from oslo_utils import excutils from oslo_utils import importutils +import psutil from neutron.common import config from neutron.common import profiler @@ -148,28 +149,51 @@ class RpcReportsWorker(RpcWorker): start_listeners_method = 'start_rpc_state_reports_listener' -def _get_rpc_workers(): - plugin = directory.get_plugin() +def _get_worker_count(): + # Start with the number of CPUs + num_workers = processutils.get_worker_count() + + # Now don't use more than half the system memory, assuming + # a steady-state bloat of around 2GB. + mem = psutil.virtual_memory() + mem_workers = int(mem.total / (2 * 1024 * 1024 * 1024)) + if mem_workers < num_workers: + num_workers = mem_workers + + # And just in case, always at least one. + if num_workers <= 0: + num_workers = 1 + + return num_workers + + +def _get_rpc_workers(plugin=None): + if plugin is None: + plugin = directory.get_plugin() service_plugins = directory.get_plugins().values() - if cfg.CONF.rpc_workers < 1: - cfg.CONF.set_override('rpc_workers', 1) + workers = cfg.CONF.rpc_workers + if workers is None: + # By default, half as many rpc workers as api workers + workers = int(_get_worker_count() / 2) + if workers < 1: + workers = 1 - # If 0 < rpc_workers then start_rpc_listeners would be called in a + # If workers > 0 then start_rpc_listeners would be called in a # subprocess and we cannot simply catch the NotImplementedError. It is # simpler to check this up front by testing whether the plugin supports # multiple RPC workers. if not plugin.rpc_workers_supported(): LOG.debug("Active plugin doesn't implement start_rpc_listeners") - if 0 < cfg.CONF.rpc_workers: + if workers > 0: LOG.error("'rpc_workers = %d' ignored because " "start_rpc_listeners is not implemented.", - cfg.CONF.rpc_workers) + workers) raise NotImplementedError() # passing service plugins only, because core plugin is among them rpc_workers = [RpcWorker(service_plugins, - worker_process_count=cfg.CONF.rpc_workers)] + worker_process_count=workers)] if (cfg.CONF.rpc_state_report_workers > 0 and plugin.rpc_state_report_workers_supported()): @@ -283,7 +307,7 @@ def start_plugins_workers(): def _get_api_workers(): workers = cfg.CONF.api_workers if workers is None: - workers = processutils.get_worker_count() + workers = _get_worker_count() return workers diff --git a/neutron/tests/functional/test_service.py b/neutron/tests/functional/test_service.py index b436415587d..0991f378211 100644 --- a/neutron/tests/functional/test_service.py +++ b/neutron/tests/functional/test_service.py @@ -24,8 +24,9 @@ from neutron.tests.functional import test_server class TestService(base.BaseLoggingTestCase): def test_api_workers_default(self): - self.assertEqual(processutils.get_worker_count(), - neutron_service._get_api_workers()) + # This value may end being scaled downward based on available RAM. + self.assertGreaterEqual(processutils.get_worker_count(), + neutron_service._get_api_workers()) def test_api_workers_from_config(self): cfg.CONF.set_override('api_workers', 1234) diff --git a/neutron/tests/unit/cmd/upgrade_checks/test_checks.py b/neutron/tests/unit/cmd/upgrade_checks/test_checks.py index 8811685ea0d..8d2e2617188 100644 --- a/neutron/tests/unit/cmd/upgrade_checks/test_checks.py +++ b/neutron/tests/unit/cmd/upgrade_checks/test_checks.py @@ -13,6 +13,7 @@ # under the License. import mock +from oslo_config import cfg from oslo_upgradecheck.upgradecheck import Code from neutron.cmd.upgrade_checks import checks @@ -28,6 +29,26 @@ class TestChecks(base.BaseTestCase): def test_get_checks_list(self): self.assertIsInstance(self.checks.get_checks(), list) - def test_noop_check(self): - check_result = checks.CoreChecks.noop_check(mock.Mock()) - self.assertEqual(Code.SUCCESS, check_result.code) + def test_worker_check_good(self): + cfg.CONF.set_override("api_workers", 2) + cfg.CONF.set_override("rpc_workers", 2) + result = checks.CoreChecks.worker_count_check(mock.Mock()) + self.assertEqual(Code.SUCCESS, result.code) + + def test_worker_check_api_missing(self): + cfg.CONF.set_override("api_workers", None) + cfg.CONF.set_override("rpc_workers", 2) + result = checks.CoreChecks.worker_count_check(mock.Mock()) + self.assertEqual(Code.WARNING, result.code) + + def test_worker_check_rpc_missing(self): + cfg.CONF.set_override("api_workers", 2) + cfg.CONF.set_override("rpc_workers", None) + result = checks.CoreChecks.worker_count_check(mock.Mock()) + self.assertEqual(Code.WARNING, result.code) + + def test_worker_check_both_missing(self): + cfg.CONF.set_override("api_workers", None) + cfg.CONF.set_override("rpc_workers", None) + result = checks.CoreChecks.worker_count_check(mock.Mock()) + self.assertEqual(Code.WARNING, result.code) diff --git a/neutron/tests/unit/test_service.py b/neutron/tests/unit/test_service.py index de5c3507baa..66a41f8ffa3 100644 --- a/neutron/tests/unit/test_service.py +++ b/neutron/tests/unit/test_service.py @@ -18,6 +18,7 @@ import mock from neutron_lib.callbacks import events from neutron_lib.callbacks import registry from neutron_lib.callbacks import resources +from oslo_concurrency import processutils from oslo_config import cfg from neutron import service @@ -25,6 +26,14 @@ from neutron.tests import base from neutron.tests.unit import test_wsgi +class TestServiceHelpers(base.BaseTestCase): + + def test_get_workers(self): + num_workers = service._get_worker_count() + self.assertGreaterEqual(num_workers, 1) + self.assertLessEqual(num_workers, processutils.get_worker_count()) + + class TestRpcWorker(test_wsgi.TestServiceBase): def test_reset(self): @@ -33,12 +42,36 @@ class TestRpcWorker(test_wsgi.TestServiceBase): self._test_reset(rpc_worker) +class TestRunRpcWorkers(base.BaseTestCase): + def setUp(self): + super(TestRunRpcWorkers, self).setUp() + self.worker_count = service._get_worker_count() + + def _test_rpc_workers(self, config_value, expected_passed_value): + if config_value is not None: + cfg.CONF.set_override('rpc_workers', config_value) + with mock.patch('neutron.service.RpcWorker') as mock_rpc_worker: + with mock.patch('neutron.service.RpcReportsWorker'): + service._get_rpc_workers(plugin=mock.Mock()) + init_call = mock_rpc_worker.call_args + expected_call = mock.call( + mock.ANY, worker_process_count=expected_passed_value) + self.assertEqual(expected_call, init_call) + + def test_rpc_workers_zero(self): + self._test_rpc_workers(0, 1) + + def test_rpc_workers_default(self): + self._test_rpc_workers(None, int(self.worker_count / 2)) + + def test_rpc_workers_defined(self): + self._test_rpc_workers(42, 42) + + class TestRunWsgiApp(base.BaseTestCase): def setUp(self): super(TestRunWsgiApp, self).setUp() - self.processor_count = mock.patch( - 'oslo_concurrency.processutils.get_worker_count' - ).start().return_value + self.worker_count = service._get_worker_count() def _test_api_workers(self, config_value, expected_passed_value): if config_value is not None: @@ -54,7 +87,7 @@ class TestRunWsgiApp(base.BaseTestCase): self._test_api_workers(0, 0) def test_api_workers_default(self): - self._test_api_workers(None, self.processor_count) + self._test_api_workers(None, self.worker_count) def test_api_workers_defined(self): self._test_api_workers(42, 42) diff --git a/releasenotes/notes/modify_api_rpc_worker_defaults-1acd62728b2b55fa.yaml b/releasenotes/notes/modify_api_rpc_worker_defaults-1acd62728b2b55fa.yaml new file mode 100644 index 00000000000..7caf692167f --- /dev/null +++ b/releasenotes/notes/modify_api_rpc_worker_defaults-1acd62728b2b55fa.yaml @@ -0,0 +1,15 @@ +upgrade: + - The number of api and rpc workers may change on upgrade. + It is strongly recommended that all deployers set these + values in their neutron configurations, rather than + using the defaults. +fixes: + - Neutron API workers default to the number of CPU cores. + This can lead to high cpu/low memory boxes getting into + trouble. The defaults have been tweaked to attempt to + put an upper bound on the default of either the number + of cores, or half of system memory, whichever is lower. + In addition, the default number of RPC workers has been + changed from a value of ``1``, to a value of half the + number of API workers. +