Merge "Use long_rpc_timeout in select_destinations RPC call" into stable/rocky

2018-12-09 05:07:50 +00:00 · 2018-12-09 05:07:50 +00:00 · 283b959207
parent 0880b0d927 4b5a21b4eb
commit 283b959207
7 changed files with 20 additions and 50 deletions
--- a/nova/conf/rpc.py
+++ b/nova/conf/rpc.py
@ -27,6 +27,7 @@ instead of the global rpc_response_timeout value.
 Operations with RPC calls that utilize this value:

 * live migration
+* scheduling

 Related options:

--- a/nova/scheduler/client/init.py
+++ b/nova/scheduler/client/init.py
@ -17,8 +17,6 @@ import functools

 from oslo_utils import importutils

-from nova.scheduler import utils
-

 class LazyLoader(object):

@ -46,7 +44,6 @@ class SchedulerClient(object):
        self.reportclient = LazyLoader(importutils.import_class(
            'nova.scheduler.client.report.SchedulerReportClient'))

-    @utils.retry_select_destinations
    def select_destinations(self, context, spec_obj, instance_uuids,
            return_objects=False, return_alternates=False):
        return self.queryclient.select_destinations(context, spec_obj,
--- a/nova/scheduler/rpcapi.py
+++ b/nova/scheduler/rpcapi.py
@ -154,7 +154,9 @@ class SchedulerAPI(object):
            msg_args['filter_properties'
                     ] = spec_obj.to_legacy_filter_properties_dict()
            version = '4.0'
-        cctxt = self.client.prepare(version=version)
+        cctxt = self.client.prepare(
+            version=version, call_monitor_timeout=CONF.rpc_response_timeout,
+            timeout=CONF.long_rpc_timeout)
        return cctxt.call(ctxt, 'select_destinations', **msg_args)

    def update_aggregates(self, ctxt, aggregates):
--- a/nova/scheduler/utils.py
+++ b/nova/scheduler/utils.py
@ -15,12 +15,10 @@
 """Utility methods for scheduling."""

 import collections
-import functools
 import re
 import sys

 from oslo_log import log as logging
-import oslo_messaging as messaging
 from oslo_serialization import jsonutils
 from six.moves.urllib import parse

@ -890,37 +888,6 @@ def setup_instance_group(context, request_spec):
        request_spec.instance_group.members = group_info.members


-def retry_on_timeout(retries=1):
-    """Retry the call in case a MessagingTimeout is raised.
-
-    A decorator for retrying calls when a service dies mid-request.
-
-    :param retries: Number of retries
-    :returns: Decorator
-    """
-    def outer(func):
-        @functools.wraps(func)
-        def wrapped(*args, **kwargs):
-            attempt = 0
-            while True:
-                try:
-                    return func(*args, **kwargs)
-                except messaging.MessagingTimeout:
-                    attempt += 1
-                    if attempt <= retries:
-                        LOG.warning(
-                            "Retrying %(name)s after a MessagingTimeout, "
-                            "attempt %(attempt)s of %(retries)s.",
-                            {'attempt': attempt, 'retries': retries,
-                             'name': func.__name__})
-                    else:
-                        raise
-        return wrapped
-    return outer
-
-retry_select_destinations = retry_on_timeout(CONF.scheduler.max_attempts - 1)
-
-
 def request_is_rebuild(spec_obj):
    """Returns True if request is for a rebuild.

--- a/nova/tests/unit/scheduler/test_client.py
+++ b/nova/tests/unit/scheduler/test_client.py
@ -60,19 +60,7 @@ class SchedulerClientTestCase(test.NoDBTestCase):
                False]
        self.assertRaises(messaging.MessagingTimeout,
                          self.client.select_destinations, *fake_args)
-        mock_select_destinations.assert_has_calls([mock.call(*fake_args)] * 2)
-
-    @mock.patch.object(scheduler_query_client.SchedulerQueryClient,
-                       'select_destinations', side_effect=[
-                           messaging.MessagingTimeout(), mock.DEFAULT])
-    def test_select_destinations_timeout_once(self, mock_select_destinations):
-        # scenario: the scheduler service times out & recovers after failure
-        fake_spec = objects.RequestSpec()
-        fake_spec.instance_uuid = uuids.instance
-        fake_args = ['ctxt', fake_spec, [fake_spec.instance_uuid], False,
-                False]
-        self.client.select_destinations(*fake_args)
-        mock_select_destinations.assert_has_calls([mock.call(*fake_args)] * 2)
+        mock_select_destinations.assert_called_once_with(*fake_args)

    @mock.patch.object(scheduler_query_client.SchedulerQueryClient,
                       'update_aggregates')
--- a/nova/tests/unit/scheduler/test_rpcapi.py
+++ b/nova/tests/unit/scheduler/test_rpcapi.py
@ -18,6 +18,7 @@ Unit Tests for nova.scheduler.rpcapi

 import mock

+from nova import conf
 from nova import context
 from nova import exception as exc
 from nova import objects
@ -25,6 +26,8 @@ from nova.scheduler import rpcapi as scheduler_rpcapi
 from nova import test
 from nova.tests import uuidsentinel as uuids

+CONF = conf.CONF
+

 class SchedulerRpcAPITestCase(test.NoDBTestCase):
    def _test_scheduler_api(self, method, rpc_method, expected_args=None,
@ -45,6 +48,11 @@ class SchedulerRpcAPITestCase(test.NoDBTestCase):
            expected_kwargs = expected_args

        prepare_kwargs = {}
+        if method == 'select_destinations':
+            prepare_kwargs.update({
+                'call_monitor_timeout': CONF.rpc_response_timeout,
+                'timeout': CONF.long_rpc_timeout
+            })
        if expected_fanout:
            prepare_kwargs['fanout'] = True
        if expected_version:
--- a/releasenotes/notes/bug-1795992-long_rpc_timeout-select_destinations-9712e8690160928f.yaml
+++ b/releasenotes/notes/bug-1795992-long_rpc_timeout-select_destinations-9712e8690160928f.yaml
@ -0,0 +1,7 @@
+---
+fixes:
+  - |
+    The ``long_rpc_timeout`` configuration option is now used for the RPC
+    call to the scheduler to select a host. This is in order to avoid a
+    timeout when scheduling multiple servers in a single request and/or when
+    the scheduler needs to process a large number of hosts.