0389c7fbb1
Recently identity (keystone) sync functionality was added to the dcorch. This changed the behaviour of the update_subcloud_states RPC. The dcmanager expects this RPC to be handled quickly and a reply sent almost immediately (timeout is 60s). Instead, the dcorch is now performing an identity sync when handling this RPC, which involves sending multiple messages to a subcloud and waiting for replies. This causes the update_subcloud_states RPC to time out sometimes (especially if a subcloud is unreachable) and the dcmanager/dcorch states to get out of sync, with no recovery mechanism in place. To fix this, I have create a new initial sync manager in the dcorch. When the dcorch handles the update_subcloud_states RPC, it will now just update the subcloud to indicate that an initial sync is required and then reply to the RPC immediately. The initial sync manager will perform the initial sync in the background (separate greenthreads) and enable the subcloud when it has completed. I also enhanced the dcmanager subcloud audit to periodically send a state update for each subcloud to the dcorch, which will correct any state mismatches that might occur. Change-Id: I70b98d432c3ed56b9532117f69f02d4a0cff5742 Closes-Bug: 1860999 Closes-Bug: 1861157 Signed-off-by: Bart Wensley <barton.wensley@windriver.com>
158 lines
6.5 KiB
Python
158 lines
6.5 KiB
Python
# Copyright 2017 Ericsson AB.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
# Copyright (c) 2020 Wind River Systems, Inc.
|
|
#
|
|
|
|
from oslo_log import log as logging
|
|
|
|
from dcorch.common import exceptions
|
|
from dcorch.engine.subcloud import SubCloudEngine
|
|
from dcorch.objects import subcloud
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class GenericSyncManager(object):
|
|
"""Manages tasks related to resource management."""
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super(GenericSyncManager, self).__init__()
|
|
self.subcloud_engines = {}
|
|
|
|
def init_from_db(self, context):
|
|
subclouds = subcloud.SubcloudList.get_all(context)
|
|
for sc in subclouds:
|
|
engine = SubCloudEngine(subcloud=sc)
|
|
LOG.info('loading subcloud %(sc)s' %
|
|
{'sc': sc.region_name})
|
|
self.subcloud_engines[sc.region_name] = engine
|
|
engine.spawn_sync_threads()
|
|
|
|
def add_subcloud(self, context, name, version):
|
|
LOG.info('adding subcloud %(sc)s' % {'sc': name})
|
|
subcloud_engine = SubCloudEngine(
|
|
context=context, name=name, version=version)
|
|
self.subcloud_engines[name] = subcloud_engine
|
|
subcloud_engine.spawn_sync_threads()
|
|
|
|
def del_subcloud(self, context, subcloud_name):
|
|
try:
|
|
subcloud_engine = self.subcloud_engines[subcloud_name]
|
|
LOG.info('deleting subcloud %(sc)s' % {'sc': subcloud_name})
|
|
subcloud_engine.delete()
|
|
del self.subcloud_engines[subcloud_name]
|
|
except KeyError:
|
|
raise exceptions.SubcloudNotFound(region_name=subcloud_name)
|
|
|
|
def sync_request(self, ctxt, endpoint_type):
|
|
# Someone has enqueued a sync job. Wake the subcloud engines.
|
|
for subcloud_engine in self.subcloud_engines.values():
|
|
subcloud_engine.wake(endpoint_type)
|
|
|
|
def subcloud_state_matches(self, subcloud_name,
|
|
management_state=None,
|
|
availability_status=None,
|
|
initial_sync_state=None):
|
|
try:
|
|
subcloud_engine = self.subcloud_engines[subcloud_name]
|
|
return subcloud_engine.state_matches(
|
|
management_state=management_state,
|
|
availability_status=availability_status,
|
|
initial_sync_state=initial_sync_state)
|
|
except KeyError:
|
|
raise exceptions.SubcloudNotFound(region_name=subcloud_name)
|
|
|
|
def update_subcloud_state(self, subcloud_name,
|
|
management_state=None,
|
|
availability_status=None,
|
|
initial_sync_state=None):
|
|
try:
|
|
subcloud_engine = self.subcloud_engines[subcloud_name]
|
|
LOG.info('updating state for subcloud %(sc)s - '
|
|
'management_state: %(mgmt)s '
|
|
'availability_status: %(avail)s '
|
|
'initial_sync_state: %(iss)s' %
|
|
{'sc': subcloud_name, 'mgmt': management_state,
|
|
'avail': availability_status, 'iss': initial_sync_state})
|
|
subcloud_engine.update_state(
|
|
management_state=management_state,
|
|
availability_status=availability_status,
|
|
initial_sync_state=initial_sync_state)
|
|
except KeyError:
|
|
raise exceptions.SubcloudNotFound(region_name=subcloud_name)
|
|
|
|
def enable_subcloud(self, context, subcloud_name):
|
|
try:
|
|
subcloud_engine = self.subcloud_engines[subcloud_name]
|
|
LOG.info('enabling subcloud %(sc)s' % {'sc': subcloud_name})
|
|
subcloud_engine.enable()
|
|
except KeyError:
|
|
raise exceptions.SubcloudNotFound(region_name=subcloud_name)
|
|
|
|
def disable_subcloud(self, context, subcloud_name):
|
|
try:
|
|
subcloud_engine = self.subcloud_engines[subcloud_name]
|
|
LOG.info('disabling subcloud %(sc)s' % {'sc': subcloud_name})
|
|
subcloud_engine.disable()
|
|
except KeyError:
|
|
raise exceptions.SubcloudNotFound(region_name=subcloud_name)
|
|
|
|
def add_subcloud_sync_endpoint_type(self, context, subcloud_name,
|
|
endpoint_type_list=None):
|
|
try:
|
|
subcloud_engine = self.subcloud_engines[subcloud_name]
|
|
except KeyError:
|
|
raise exceptions.SubcloudNotFound(region_name=subcloud_name)
|
|
|
|
LOG.info('adding sync endpoint type for subcloud %(sc)s' %
|
|
{'sc': subcloud_name})
|
|
try:
|
|
subcloud_engine.add_sync_endpoint_type(endpoint_type_list)
|
|
except Exception:
|
|
subcloud_engine.remove_sync_endpoint_type(endpoint_type_list)
|
|
raise
|
|
|
|
def remove_subcloud_sync_endpoint_type(self, context, subcloud_name,
|
|
endpoint_type_list=None):
|
|
try:
|
|
subcloud_engine = self.subcloud_engines[subcloud_name]
|
|
LOG.info('removing sync endpoint type for subcloud %(sc)s' %
|
|
{'sc': subcloud_name})
|
|
subcloud_engine.remove_sync_endpoint_type(endpoint_type_list)
|
|
except KeyError:
|
|
raise exceptions.SubcloudNotFound(region_name=subcloud_name)
|
|
|
|
def update_subcloud_version(self, context, subcloud_name, sw_version):
|
|
try:
|
|
subcloud_engine = self.subcloud_engines[subcloud_name]
|
|
LOG.info('updating subcloud %(sc)s version to %(ver)s' %
|
|
{'sc': subcloud_name, 'ver': sw_version})
|
|
subcloud_engine.set_version(sw_version)
|
|
except KeyError:
|
|
raise exceptions.SubcloudNotFound(region_name=subcloud_name)
|
|
|
|
def initial_sync(self, context, subcloud_name):
|
|
try:
|
|
subcloud_engine = self.subcloud_engines[subcloud_name]
|
|
LOG.info('Initial sync subcloud %(sc)s' % {'sc': subcloud_name})
|
|
subcloud_engine.initial_sync()
|
|
except KeyError:
|
|
raise exceptions.SubcloudNotFound(region_name=subcloud_name)
|
|
|
|
def run_sync_audit(self):
|
|
for subcloud_engine in self.subcloud_engines.values():
|
|
subcloud_engine.run_sync_audit()
|