From 57f0ddbf9111ef2e3738af9c314150ed0f84f110 Mon Sep 17 00:00:00 2001 From: zhiyuan_cai Date: Sat, 28 Nov 2015 17:52:04 +0800 Subject: [PATCH] Proxy for compute Initial patch for compute proxy, now we can boot a vm and the RPC request will be captured by dispatcher then sent to proxy. Change-Id: I361d18e0b87ece3db113f6549f2b3c9bf50e1006 --- cmd/api.py | 0 cmd/dispatcher.py | 23 +- cmd/proxy.py | 85 +++ devstack/local.conf.sample | 5 +- devstack/plugin.sh | 99 ++-- devstack/settings | 20 +- tricircle/api/app.py | 4 +- tricircle/common/nova_lib.py | 65 ++ tricircle/common/service.py | 80 +++ tricircle/common/utils.py | 4 + tricircle/dispatcher/compute.py | 110 ---- tricircle/dispatcher/compute_manager.py | 126 ++++ tricircle/dispatcher/host_manager.py | 50 ++ tricircle/dispatcher/site_manager.py | 16 +- tricircle/networking/plugin.py | 2 +- tricircle/proxy/__init__.py | 0 tricircle/proxy/compute_manager.py | 751 ++++++++++++++++++++++++ tricircle/proxy/service.py | 42 ++ 18 files changed, 1301 insertions(+), 181 deletions(-) mode change 100755 => 100644 cmd/api.py create mode 100644 cmd/proxy.py create mode 100644 tricircle/common/nova_lib.py create mode 100644 tricircle/common/service.py delete mode 100644 tricircle/dispatcher/compute.py create mode 100644 tricircle/dispatcher/compute_manager.py create mode 100644 tricircle/dispatcher/host_manager.py create mode 100644 tricircle/proxy/__init__.py create mode 100644 tricircle/proxy/compute_manager.py create mode 100644 tricircle/proxy/service.py diff --git a/cmd/api.py b/cmd/api.py old mode 100755 new mode 100644 diff --git a/cmd/dispatcher.py b/cmd/dispatcher.py index 4834b8d8..1814de39 100644 --- a/cmd/dispatcher.py +++ b/cmd/dispatcher.py @@ -22,19 +22,18 @@ if __name__ == "__main__": import sys import traceback -from oslo_log import log as logging from oslo_config import cfg +from oslo_log import log as logging -from nova import exception as nova_exception -from nova import quota -import nova.db.api -from nova.conductor import rpcapi as conductor_rpcapi -from nova.i18n import _LE -import nova.objects as nova_objects -from nova.objects import base as objects_base -import nova.rpc as nova_rpc - -import tricircle.dispatcher.service as service +from tricircle.common.i18n import _LE +from tricircle.common.nova_lib import conductor_rpcapi +from tricircle.common.nova_lib import db_api as nova_db_api +from tricircle.common.nova_lib import exception as nova_exception +from tricircle.common.nova_lib import objects as nova_objects +from tricircle.common.nova_lib import objects_base +from tricircle.common.nova_lib import quota +from tricircle.common.nova_lib import rpc as nova_rpc +from tricircle.dispatcher import service def block_db_access(): @@ -49,7 +48,7 @@ def block_db_access(): stacktrace) raise nova_exception.DBNotAllowed('nova-compute') - nova.db.api.IMPL = NoDB() + nova_db_api.IMPL = NoDB() def set_up_nova_object_indirection(): diff --git a/cmd/proxy.py b/cmd/proxy.py new file mode 100644 index 00000000..70dd2a5d --- /dev/null +++ b/cmd/proxy.py @@ -0,0 +1,85 @@ +# Copyright 2015 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import eventlet + +if __name__ == "__main__": + eventlet.monkey_patch() + +import sys +import traceback + +from oslo_config import cfg +from oslo_log import log as logging + +from tricircle.common.i18n import _LE +from tricircle.common.nova_lib import conductor_rpcapi +from tricircle.common.nova_lib import db_api as nova_db_api +from tricircle.common.nova_lib import exception as nova_exception +from tricircle.common.nova_lib import objects as nova_objects +from tricircle.common.nova_lib import objects_base +from tricircle.common.nova_lib import quota +from tricircle.common.nova_lib import rpc as nova_rpc +from tricircle.proxy import service + + +def block_db_access(): + class NoDB(object): + def __getattr__(self, attr): + return self + + def __call__(self, *args, **kwargs): + stacktrace = "".join(traceback.format_stack()) + LOG = logging.getLogger('nova.compute') + LOG.error(_LE('No db access allowed in nova-compute: %s'), + stacktrace) + raise nova_exception.DBNotAllowed('nova-compute') + + nova_db_api.IMPL = NoDB() + + +def set_up_nova_object_indirection(): + conductor = conductor_rpcapi.ConductorAPI() + conductor.client.target.exchange = "nova" + objects_base.NovaObject.indirection_api = conductor + + +def process_command_line_arguments(): + logging.register_options(cfg.CONF) + logging.set_defaults() + cfg.CONF(sys.argv[1:]) + logging.setup(cfg.CONF, "proxy", version='0.1') + + +def _set_up_nova_objects(): + nova_rpc.init(cfg.CONF) + block_db_access() + set_up_nova_object_indirection() + nova_objects.register_all() + + +def _disable_quotas(): + QUOTAS = quota.QUOTAS + QUOTAS._driver_cls = quota.NoopQuotaDriver() + + +if __name__ == "__main__": + _set_up_nova_objects() + _disable_quotas() + process_command_line_arguments() + server = service.setup_server() + server.start() + server.wait() diff --git a/devstack/local.conf.sample b/devstack/local.conf.sample index c4c33a16..df7765c4 100644 --- a/devstack/local.conf.sample +++ b/devstack/local.conf.sample @@ -32,8 +32,9 @@ Q_ENABLE_TRICIRCLE=True enable_plugin tricircle https://git.openstack.org/openstack/tricircle master # Tricircle Services -enable_service t-svc -enable_service t-svc-api +enable_service t-api +enable_service t-prx +enable_service t-dis # Use Neutron instead of nova-network disable_service n-net diff --git a/devstack/plugin.sh b/devstack/plugin.sh index 63de91e3..4b37beed 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -3,7 +3,7 @@ # Test if any tricircle services are enabled # is_tricircle_enabled function is_tricircle_enabled { - [[ ,${ENABLED_SERVICES} =~ ,"t-svc-" ]] && return 0 + [[ ,${ENABLED_SERVICES} =~ ,"t-" ]] && return 0 return 1 } @@ -14,7 +14,7 @@ function is_tricircle_enabled { # $SERVICE_TENANT_NAME tricircle service function create_tricircle_accounts { - if [[ "$ENABLED_SERVICES" =~ "t-svc-api" ]]; then + if [[ "$ENABLED_SERVICES" =~ "t-api" ]]; then create_service_user "tricircle" if [[ "$KEYSTONE_CATALOG_BACKEND" = 'sql' ]]; then @@ -22,9 +22,9 @@ function create_tricircle_accounts { "Cascading" "OpenStack Cascading Service") get_or_create_endpoint $tricircle_dispatcher \ "$REGION_NAME" \ - "$SERVICE_PROTOCOL://$TRICIRCLE_CASCADE_API_HOST:$TRICIRCLE_CASCADE_API_PORT/v1.0" \ - "$SERVICE_PROTOCOL://$TRICIRCLE_CASCADE_API_HOST:$TRICIRCLE_CASCADE_API_PORT/v1.0" \ - "$SERVICE_PROTOCOL://$TRICIRCLE_CASCADE_API_HOST:$TRICIRCLE_CASCADE_API_PORT/v1.0" + "$SERVICE_PROTOCOL://$TRICIRCLE_API_HOST:$TRICIRCLE_API_PORT/v1.0" \ + "$SERVICE_PROTOCOL://$TRICIRCLE_API_HOST:$TRICIRCLE_API_PORT/v1.0" \ + "$SERVICE_PROTOCOL://$TRICIRCLE_API_HOST:$TRICIRCLE_API_PORT/v1.0" fi fi } @@ -40,10 +40,9 @@ function create_tricircle_cache_dir { } -function configure_tricircle_plugin { - echo "Configuring Neutron for Tricircle" - +function configure_tricircle_dispatcher { if is_service_enabled q-svc ; then + echo "Configuring Neutron plugin for Tricircle" Q_PLUGIN_CLASS="tricircle.networking.plugin.TricirclePlugin" #NEUTRON_CONF=/etc/neutron/neutron.conf @@ -51,8 +50,8 @@ function configure_tricircle_plugin { iniset $NEUTRON_CONF DEFAULT service_plugins "" fi - if is_service_enabled t-svc ; then - echo "Configuring Neutron for Tricircle Cascade Service" + if is_service_enabled t-dis ; then + echo "Configuring Tricircle Dispatcher" sudo install -d -o $STACK_USER -m 755 $TRICIRCLE_CONF_DIR cp -p $TRICIRCLE_DIR/etc/dispatcher.conf $TRICIRCLE_DISPATCHER_CONF @@ -66,37 +65,52 @@ function configure_tricircle_plugin { iniset $TRICIRCLE_DISPATCHER_CONF DEFAULT use_syslog $SYSLOG iniset_rpc_backend tricircle $TRICIRCLE_DISPATCHER_CONF iniset $TRICIRCLE_DISPATCHER_CONF database connection `database_connection_url tricircle` + + iniset $TRICIRCLE_DISPATCHER_CONF client admin_username admin + iniset $TRICIRCLE_DISPATCHER_CONF client admin_password $ADMIN_PASSWORD + iniset $TRICIRCLE_DISPATCHER_CONF client admin_tenant demo + iniset $TRICIRCLE_DISPATCHER_CONF client auto_refresh_endpoint True + iniset $TRICIRCLE_DISPATCHER_CONF client top_site_name $OS_REGION_NAME fi } -function configure_tricircle_cascade_api { - echo "Configuring tricircle cascade api service" +function configure_tricircle_proxy { + if is_service_enabled t-prx ; then + echo "Configuring Tricircle Proxy" - if is_service_enabled t-svc-api ; then - cp -p $TRICIRCLE_DIR/etc/api.conf $TRICIRCLE_CASCADE_API_CONF - iniset $TRICIRCLE_CASCADE_API_CONF DEFAULT debug $ENABLE_DEBUG_LOG_LEVEL - iniset $TRICIRCLE_CASCADE_API_CONF DEFAULT verbose True - iniset $TRICIRCLE_CASCADE_API_CONF DEFAULT use_syslog $SYSLOG - iniset $TRICIRCLE_CASCADE_API_CONF database connection `database_connection_url tricircle` + cp -p $NOVA_CONF $TRICIRCLE_CONF_DIR + mv $TRICIRCLE_CONF_DIR/nova.conf $TRICIRCLE_PROXY_CONF + fi +} - iniset $TRICIRCLE_CASCADE_API_CONF client admin_username admin - iniset $TRICIRCLE_CASCADE_API_CONF client admin_password $ADMIN_PASSWORD - iniset $TRICIRCLE_CASCADE_API_CONF client admin_tenant demo - iniset $TRICIRCLE_CASCADE_API_CONF client auto_refresh_endpoint True - iniset $TRICIRCLE_CASCADE_API_CONF client top_site_name $OS_REGION_NAME +function configure_tricircle_api { + if is_service_enabled t-api ; then + echo "Configuring Tricircle API" - setup_colorized_logging $TRICIRCLE_CASCADE_API_CONF DEFAULT tenant_name + cp -p $TRICIRCLE_DIR/etc/api.conf $TRICIRCLE_API_CONF + iniset $TRICIRCLE_API_CONF DEFAULT debug $ENABLE_DEBUG_LOG_LEVEL + iniset $TRICIRCLE_API_CONF DEFAULT verbose True + iniset $TRICIRCLE_API_CONF DEFAULT use_syslog $SYSLOG + iniset $TRICIRCLE_API_CONF database connection `database_connection_url tricircle` + + iniset $TRICIRCLE_API_CONF client admin_username admin + iniset $TRICIRCLE_API_CONF client admin_password $ADMIN_PASSWORD + iniset $TRICIRCLE_API_CONF client admin_tenant demo + iniset $TRICIRCLE_API_CONF client auto_refresh_endpoint True + iniset $TRICIRCLE_API_CONF client top_site_name $OS_REGION_NAME + + setup_colorized_logging $TRICIRCLE_API_CONF DEFAULT tenant_name if is_service_enabled keystone; then create_tricircle_cache_dir # Configure auth token middleware - configure_auth_token_middleware $TRICIRCLE_CASCADE_API_CONF tricircle \ + configure_auth_token_middleware $TRICIRCLE_API_CONF tricircle \ $TRICIRCLE_AUTH_CACHE_DIR else - iniset $TRICIRCLE_CASCADE_API_CONF DEFAULT auth_strategy noauth + iniset $TRICIRCLE_API_CONF DEFAULT auth_strategy noauth fi fi @@ -113,10 +127,11 @@ if [[ "$Q_ENABLE_TRICIRCLE" == "True" ]]; then elif [[ "$1" == "stack" && "$2" == "post-config" ]]; then - echo_summary "Configure Tricircle" + echo_summary "Configuring Tricircle" - configure_tricircle_plugin - configure_tricircle_cascade_api + configure_tricircle_dispatcher + configure_tricircle_proxy + configure_tricircle_api echo export PYTHONPATH=\$PYTHONPATH:$TRICIRCLE_DIR >> $RC_DIR/.localrc.auto @@ -124,28 +139,36 @@ if [[ "$Q_ENABLE_TRICIRCLE" == "True" ]]; then python "$TRICIRCLE_DIR/cmd/manage.py" "$TRICIRCLE_DISPATCHER_CONF" elif [[ "$1" == "stack" && "$2" == "extra" ]]; then - echo_summary "Initializing Cascading Service" + echo_summary "Initializing Tricircle Service" - if is_service_enabled t-svc; then - run_process t-svc "python $TRICIRCLE_DISPATCHER --config-file $TRICIRCLE_DISPATCHER_CONF --config-dir $TRICIRCLE_CONF_DIR" + if is_service_enabled t-dis; then + run_process t-dis "python $TRICIRCLE_DISPATCHER --config-file $TRICIRCLE_DISPATCHER_CONF" fi - if is_service_enabled t-svc-api; then + if is_service_enabled t-prx; then + run_process t-prx "python $TRICIRCLE_PROXY --config-file $TRICIRCLE_PROXY_CONF" + fi + + if is_service_enabled t-api; then create_tricircle_accounts - run_process t-svc-api "python $TRICIRCLE_CASCADE_API --config-file $TRICIRCLE_CASCADE_API_CONF" + run_process t-api "python $TRICIRCLE_API --config-file $TRICIRCLE_API_CONF" fi fi if [[ "$1" == "unstack" ]]; then - if is_service_enabled t-svc; then - stop_process t-svc + if is_service_enabled t-dis; then + stop_process t-dis fi - if is_service_enabled t-svc-api; then - stop_process t-svc-api + if is_service_enabled t-prx; then + stop_process t-prx + fi + + if is_service_enabled t-api; then + stop_process t-api fi fi fi diff --git a/devstack/settings b/devstack/settings index 046c346c..e3f89549 100644 --- a/devstack/settings +++ b/devstack/settings @@ -6,19 +6,23 @@ TRICIRCLE_BRANCH=${TRICIRCLE_BRANCH:-master} # common variables TRICIRCLE_CONF_DIR=${TRICIRCLE_CONF_DIR:-/etc/tricircle} -# cascade service +# tricircle dispatcher TRICIRCLE_DISPATCHER=$TRICIRCLE_DIR/cmd/dispatcher.py TRICIRCLE_DISPATCHER_CONF=$TRICIRCLE_CONF_DIR/dispatcher.conf TRICIRCLE_DISPATCHER_LISTEN_ADDRESS=${TRICIRCLE_DISPATCHER_LISTEN_ADDRESS:-0.0.0.0} -# cascade rest api -TRICIRCLE_CASCADE_API=$TRICIRCLE_DIR/cmd/api.py -TRICIRCLE_CASCADE_API_CONF=$TRICIRCLE_CONF_DIR/api.conf +# tricircle proxy +TRICIRCLE_PROXY=$TRICIRCLE_DIR/cmd/proxy.py +TRICIRCLE_PROXY_CONF=$TRICIRCLE_CONF_DIR/proxy.conf -TRICIRCLE_CASCADE_API_LISTEN_ADDRESS=${TRICIRCLE_CASCADE_API_LISTEN_ADDRESS:-0.0.0.0} -TRICIRCLE_CASCADE_API_HOST=${TRICIRCLE_CASCADE_API_HOST:-$SERVICE_HOST} -TRICIRCLE_CASCADE_API_PORT=${TRICIRCLE_CASCADE_API_PORT:-19999} -TRICIRCLE_CASCADE_API_PROTOCOL=${TRICIRCLE_CASCADE_API_PROTOCOL:-$SERVICE_PROTOCOL} +# tricircle rest api +TRICIRCLE_API=$TRICIRCLE_DIR/cmd/api.py +TRICIRCLE_API_CONF=$TRICIRCLE_CONF_DIR/api.conf + +TRICIRCLE_API_LISTEN_ADDRESS=${TRICIRCLE_API_LISTEN_ADDRESS:-0.0.0.0} +TRICIRCLE_API_HOST=${TRICIRCLE_API_HOST:-$SERVICE_HOST} +TRICIRCLE_API_PORT=${TRICIRCLE_API_PORT:-19999} +TRICIRCLE_API_PROTOCOL=${TRICIRCLE_API_PROTOCOL:-$SERVICE_PROTOCOL} TRICIRCLE_AUTH_CACHE_DIR=${TRICIRCLE_AUTH_CACHE_DIR:-/var/cache/tricircle} diff --git a/tricircle/api/app.py b/tricircle/api/app.py index 6cabfc59..ed756dd4 100755 --- a/tricircle/api/app.py +++ b/tricircle/api/app.py @@ -58,7 +58,9 @@ def _wrap_app(app): if cfg.CONF.auth_strategy == 'noauth': pass elif cfg.CONF.auth_strategy == 'keystone': - app = auth_token.AuthProtocol(app, {}) + # NOTE(zhiyuan) pkg_resources will try to load tricircle to get module + # version, passing "project" as empty string to bypass it + app = auth_token.AuthProtocol(app, {'project': ''}) else: raise t_exc.InvalidConfigurationOption( opt_name='auth_strategy', opt_value=cfg.CONF.auth_strategy) diff --git a/tricircle/common/nova_lib.py b/tricircle/common/nova_lib.py new file mode 100644 index 00000000..c66e7f3a --- /dev/null +++ b/tricircle/common/nova_lib.py @@ -0,0 +1,65 @@ +# Copyright 2015 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import nova.block_device +import nova.cloudpipe.pipelib +import nova.compute.manager +import nova.compute.task_states +import nova.compute.utils +import nova.compute.vm_states +import nova.conductor +import nova.conductor.rpcapi +import nova.context +import nova.db.api +import nova.exception +import nova.manager +import nova.network +import nova.network.model +import nova.network.security_group.openstack_driver +import nova.objects +import nova.objects.base +import nova.quota +import nova.rpc +import nova.service +import nova.utils +import nova.version +import nova.virt.block_device +import nova.volume + + +block_device = nova.block_device +pipelib = nova.cloudpipe.pipelib +compute_manager = nova.compute.manager +task_states = nova.compute.task_states +vm_states = nova.compute.vm_states +compute_utils = nova.compute.utils +conductor = nova.conductor +conductor_rpcapi = nova.conductor.rpcapi +context = nova.context +db_api = nova.db.api +exception = nova.exception +manager = nova.manager +network = nova.network +network_model = nova.network.model +openstack_driver = nova.network.security_group.openstack_driver +objects = nova.objects +objects_base = nova.objects.base +quota = nova.quota +rpc = nova.rpc +service = nova.service +utils = nova.utils +driver_block_device = nova.virt.block_device +volume = nova.volume +version = nova.version diff --git a/tricircle/common/service.py b/tricircle/common/service.py new file mode 100644 index 00000000..f9c4cdf6 --- /dev/null +++ b/tricircle/common/service.py @@ -0,0 +1,80 @@ +# Copyright 2015 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from tricircle.common.nova_lib import rpc as nova_rpc +from tricircle.common.nova_lib import service as nova_service +from tricircle.common.nova_lib import version as nova_version + + +def fix_compute_service_exchange(service): + """Fix service exchange value for nova""" + + _manager = service.manager + + client_paths = [ + ('compute_rpcapi', 'client'), + ('compute_task_api', 'conductor_compute_rpcapi', 'client'), + ('consoleauth_rpcapi', 'client'), + ('scheduler_client', 'queryclient', 'scheduler_rpcapi', 'client'), + ('proxy_client',), + ('conductor_api', '_manager', 'client') + ] + for client_path in client_paths: + if not hasattr(_manager, client_path[0]): + continue + obj = getattr(_manager, client_path[0]) + for part in client_path[1:]: + obj = getattr(obj, part) + obj.target.exchange = 'nova' + + +def _patch_nova_service(): + if nova_version.loaded: + return + + nova_version.NOVA_PACKAGE = "tricircle" + nova_rpc.TRANSPORT.conf.set_override('control_exchange', 'nova') + nova_version.loaded = True + + +class NovaService(nova_service.Service): + def __init__(self, *args, **kwargs): + _patch_nova_service() + self._conductor_api = None + self._rpcserver = None + super(NovaService, self).__init__(*args, **kwargs) + + @property + def conductor_api(self): + return self._conductor_api + + @conductor_api.setter + def conductor_api(self, value): + self._conductor_api = value + for client in ( + self._conductor_api.base_rpcapi.client, + self._conductor_api._manager.client, + ): + client.target.exchange = "nova" + + @property + def rpcserver(self): + return self._rpcserver + + @rpcserver.setter + def rpcserver(self, value): + self._rpcserver = value + if value is not None: + value.dispatcher._target.exchange = "nova" diff --git a/tricircle/common/utils.py b/tricircle/common/utils.py index da59d3bd..53988ddb 100644 --- a/tricircle/common/utils.py +++ b/tricircle/common/utils.py @@ -24,3 +24,7 @@ def get_ag_name(site_name): def get_az_name(site_name): return 'az_%s' % site_name + + +def get_node_name(site_name): + return "cascade_%s" % site_name diff --git a/tricircle/dispatcher/compute.py b/tricircle/dispatcher/compute.py deleted file mode 100644 index 14dc5e4a..00000000 --- a/tricircle/dispatcher/compute.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright 2015 Huawei Technologies Co., Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from nova.compute.manager import ComputeManager -from nova.virt.fake import FakeDriver - -import nova.rpc as nova_rpc -from nova.service import Service -import nova.version as nova_version - -from tricircle.common.utils import get_import_path - -_REPORT_INTERVAL = 30 -_REPORT_INTERVAL_MAX = 60 - - -def _patch_nova_service(): - if nova_version.loaded: - return - - nova_version.NOVA_PACKAGE = "tricircle" - nova_rpc.TRANSPORT.conf.set_override('control_exchange', 'nova') - nova_version.loaded = True - - -class NovaService(Service): - def __init__(self, *args, **kwargs): - _patch_nova_service() - self._conductor_api = None - self._rpcserver = None - super(NovaService, self).__init__(*args, **kwargs) - - @property - def conductor_api(self): - return self._conductor_api - - @conductor_api.setter - def conductor_api(self, value): - self._conductor_api = value - for client in ( - self._conductor_api.base_rpcapi.client, - self._conductor_api._manager.client, - ): - client.target.exchange = "nova" - - @property - def rpcserver(self): - return self._rpcserver - - @rpcserver.setter - def rpcserver(self, value): - self._rpcserver = value - if value is not None: - value.dispatcher._target.exchange = "nova" - - -def _fix_compute_service_exchange(service): - """Fix service exchange value for nova""" - - manager = service.manager - for client in ( - manager.compute_rpcapi.client, - manager.compute_task_api.conductor_compute_rpcapi.client, - manager.consoleauth_rpcapi.client, - # manager.scheduler_client.queryclient.scheduler_rpcapi.client, - ): - client.target.exchange = "nova" - - -class ComputeHostManager(object): - def __init__(self, site_manager): - self._compute_nodes = [] - - def _create_compute_node_service(self, host): - service = NovaService( - host=host, - binary="nova-compute", - topic="compute", # TODO(saggi): get from conf - db_allowed=False, - periodic_enable=True, - report_interval=_REPORT_INTERVAL, - periodic_interval_max=_REPORT_INTERVAL_MAX, - manager=get_import_path(ComputeManager), - # temporally use FakeDriver, new compute manager doesn't require - # compute driver so this can be removed after new compute manager - # is finished - compute_driver=get_import_path(FakeDriver) - ) - - _fix_compute_service_exchange(service) - - return service - - def create_host_adapter(self, host): - """Creates an adapter between the nova compute API and Site object""" - service = self._create_compute_node_service(host) - service.start() - self._compute_nodes.append(service) diff --git a/tricircle/dispatcher/compute_manager.py b/tricircle/dispatcher/compute_manager.py new file mode 100644 index 00000000..c1ee8d31 --- /dev/null +++ b/tricircle/dispatcher/compute_manager.py @@ -0,0 +1,126 @@ +# Copyright 2015 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import oslo_log.log as logging +import oslo_messaging as messaging + +from tricircle.common.i18n import _LI +from tricircle.common.i18n import _LW +from tricircle.common.nova_lib import context as nova_context +from tricircle.common.nova_lib import exception +from tricircle.common.nova_lib import manager +from tricircle.common.nova_lib import objects +from tricircle.common.nova_lib import objects_base +from tricircle.common.nova_lib import rpc as nova_rpc +from tricircle.common import utils + +LOG = logging.getLogger(__name__) + + +class DispatcherComputeManager(manager.Manager): + + target = messaging.Target(version='4.0') + + def __init__(self, site_manager=None, *args, **kwargs): + self._site_manager = site_manager + + target = messaging.Target(topic="proxy", version='4.0') + serializer = objects_base.NovaObjectSerializer() + self.proxy_client = nova_rpc.get_client(target, '4.0', serializer) + + super(DispatcherComputeManager, self).__init__(service_name="compute", + *args, **kwargs) + + def _get_compute_node(self, context): + """Returns compute node for the host and nodename.""" + try: + return objects.ComputeNode.get_by_host_and_nodename( + context, self.host, utils.get_node_name(self.host)) + except exception.NotFound: + LOG.warning(_LW("No compute node record for %(host)s:%(node)s"), + {'host': self.host, + 'node': utils.get_node_name(self.host)}) + + def _copy_resources(self, compute_node, resources): + """Copy resource values to initialise compute_node""" + + # update the allocation ratios for the related ComputeNode object + compute_node.ram_allocation_ratio = 1 + compute_node.cpu_allocation_ratio = 1 + + # now copy rest to compute_node + for key in resources: + compute_node[key] = resources[key] + + def _init_compute_node(self, context, resources): + """Initialise the compute node if it does not already exist. + + The nova scheduler will be inoperable if compute_node + is not defined. The compute_node will remain undefined if + we fail to create it or if there is no associated service + registered. + If this method has to create a compute node it needs initial + values - these come from resources. + :param context: security context + :param resources: initial values + """ + + # try to get the compute node record from the + # database. If we get one we use resources to initialize + compute_node = self._get_compute_node(context) + if compute_node: + self._copy_resources(compute_node, resources) + compute_node.save() + return + + # there was no local copy and none in the database + # so we need to create a new compute node. This needs + # to be initialised with resource values. + compute_node = objects.ComputeNode(context) + service = objects.Service.get_by_host_and_binary( + context, self.host, 'nova-compute') + compute_node.host = self.host + compute_node.service_id = service['id'] + self._copy_resources(compute_node, resources) + compute_node.create() + LOG.info(_LI('Compute_service record created for ' + '%(host)s:%(node)s'), + {'host': self.host, 'node': utils.get_node_name(self.host)}) + + # NOTE(zhiyuan) register fake compute node information in db so nova + # scheduler can properly select destination + def pre_start_hook(self): + site = self._site_manager.get_site(self.host) + node = site.get_nodes()[0] + resources = node.get_available_resource() + context = nova_context.get_admin_context() + self._init_compute_node(context, resources) + + def build_and_run_instance(self, context, instance, image, request_spec, + filter_properties, admin_password=None, + injected_files=None, requested_networks=None, + security_groups=None, block_device_mapping=None, + node=None, limits=None): + version = '4.0' + cctxt = self.proxy_client.prepare(version=version) + cctxt.cast(context, 'build_and_run_instance', host=self.host, + instance=instance, image=image, request_spec=request_spec, + filter_properties=filter_properties, + admin_password=admin_password, + injected_files=injected_files, + requested_networks=requested_networks, + security_groups=security_groups, + block_device_mapping=block_device_mapping, node=node, + limits=limits) diff --git a/tricircle/dispatcher/host_manager.py b/tricircle/dispatcher/host_manager.py new file mode 100644 index 00000000..7f2ce78b --- /dev/null +++ b/tricircle/dispatcher/host_manager.py @@ -0,0 +1,50 @@ +# Copyright 2015 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tricircle.common.service as t_service +from tricircle.common.utils import get_import_path +from tricircle.dispatcher.compute_manager import DispatcherComputeManager + +_REPORT_INTERVAL = 30 +_REPORT_INTERVAL_MAX = 60 + + +class ComputeHostManager(object): + def __init__(self, site_manager): + self._compute_nodes = [] + self._site_manager = site_manager + + def _create_compute_node_service(self, host): + service = t_service.NovaService( + host=host, + binary="nova-compute", + topic="compute", # TODO(saggi): get from conf + db_allowed=False, + periodic_enable=True, + report_interval=_REPORT_INTERVAL, + periodic_interval_max=_REPORT_INTERVAL_MAX, + manager=get_import_path(DispatcherComputeManager), + site_manager=self._site_manager + ) + + t_service.fix_compute_service_exchange(service) + + return service + + def create_host_adapter(self, host): + """Creates an adapter between the nova compute API and Site object""" + service = self._create_compute_node_service(host) + service.start() + self._compute_nodes.append(service) diff --git a/tricircle/dispatcher/site_manager.py b/tricircle/dispatcher/site_manager.py index 8c24f713..db43b357 100644 --- a/tricircle/dispatcher/site_manager.py +++ b/tricircle/dispatcher/site_manager.py @@ -12,15 +12,13 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. -# TODO(saggi) change to oslo before release -from oslo_serialization import jsonutils as json import tricircle.common.context as t_context from tricircle.common.singleton import Singleton from tricircle.common import utils from tricircle.db import client from tricircle.db import models -from tricircle.dispatcher.compute import ComputeHostManager +from tricircle.dispatcher.host_manager import ComputeHostManager class Node(object): @@ -40,11 +38,11 @@ class Node(object): self.running_vms = 0 self.cpu_info = "" self.disk_available_least = 1 - self.supported_instances = [] + self.supported_hv_specs = [] self.metrics = None self.pci_stats = None self.extra_resources = None - self.stats = json.dumps({}) + self.stats = {} self.numa_topology = None def get_available_resource(self): @@ -64,12 +62,12 @@ class Node(object): "running_vms": self.running_vms, "cpu_info": self.cpu_info, "disk_available_least": self.disk_available_least, - "supported_instances": self.supported_instances, + "supported_hv_specs": self.supported_hv_specs, "metrics": self.metrics, "pci_stats": self.pci_stats, "extra_resources": self.extra_resources, - "stats": (self.stats), - "numa_topology": (self.numa_topology), + "stats": self.stats, + "numa_topology": self.numa_topology, } @@ -79,7 +77,7 @@ class Site(object): # We currently just hold one aggregate subnode representing the # resources owned by all the site's nodes. - self._aggragate_node = Node("cascade_" + name) + self._aggragate_node = Node(utils.get_node_name(name)) self._instance_launch_information = {} diff --git a/tricircle/networking/plugin.py b/tricircle/networking/plugin.py index b9d87331..779e83b3 100644 --- a/tricircle/networking/plugin.py +++ b/tricircle/networking/plugin.py @@ -131,7 +131,7 @@ class TricirclePlugin(db_base_plugin_v2.NeutronDbPluginV2, if self._cascading_rpc_api: self._cascading_rpc_api.delete_port(context, port_id, - l3_port_checki=True) + l3_port_check=True) return ret_val diff --git a/tricircle/proxy/__init__.py b/tricircle/proxy/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tricircle/proxy/compute_manager.py b/tricircle/proxy/compute_manager.py new file mode 100644 index 00000000..8f88f379 --- /dev/null +++ b/tricircle/proxy/compute_manager.py @@ -0,0 +1,751 @@ +# Copyright 2015 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import contextlib +import functools +import six +import sys +import time +import traceback + +from oslo_config import cfg +import oslo_log.log as logging +import oslo_messaging as messaging +from oslo_utils import excutils +from oslo_utils import strutils + +from tricircle.common.i18n import _ +from tricircle.common.i18n import _LE +from tricircle.common.i18n import _LW +from tricircle.common.nova_lib import block_device +from tricircle.common.nova_lib import compute_manager +from tricircle.common.nova_lib import compute_utils +from tricircle.common.nova_lib import conductor +from tricircle.common.nova_lib import driver_block_device +from tricircle.common.nova_lib import exception +from tricircle.common.nova_lib import manager +from tricircle.common.nova_lib import network +from tricircle.common.nova_lib import network_model +from tricircle.common.nova_lib import objects +from tricircle.common.nova_lib import openstack_driver +from tricircle.common.nova_lib import pipelib +from tricircle.common.nova_lib import rpc +from tricircle.common.nova_lib import task_states +from tricircle.common.nova_lib import utils +from tricircle.common.nova_lib import vm_states +from tricircle.common.nova_lib import volume +import tricircle.common.utils as t_utils + + +CONF = cfg.CONF + +compute_opts = [ + cfg.StrOpt('default_access_ip_network_name', + help='Name of network to use to set access IPs for instances'), + cfg.IntOpt('network_allocate_retries', + default=0, + help="Number of times to retry network allocation on failures"), +] +CONF.register_opts(compute_opts) + + +LOG = logging.getLogger(__name__) + +SERVICE_NAME = 'proxy_compute' + +get_notifier = functools.partial(rpc.get_notifier, service=SERVICE_NAME) +wrap_exception = functools.partial(exception.wrap_exception, + get_notifier=get_notifier) +reverts_task_state = compute_manager.reverts_task_state +wrap_instance_fault = compute_manager.wrap_instance_fault +wrap_instance_event = compute_manager.wrap_instance_event + + +class ProxyComputeManager(manager.Manager): + + target = messaging.Target(version='4.0') + + def __init__(self, *args, **kwargs): + self.is_neutron_security_groups = ( + openstack_driver.is_neutron_security_groups()) + self.use_legacy_block_device_info = False + + self.network_api = network.API() + self.volume_api = volume.API() + self.conductor_api = conductor.API() + self.compute_task_api = conductor.ComputeTaskAPI() + + super(ProxyComputeManager, self).__init__( + service_name=SERVICE_NAME, *args, **kwargs) + + def _decode_files(self, injected_files): + """Base64 decode the list of files to inject.""" + if not injected_files: + return [] + + def _decode(f): + path, contents = f + try: + decoded = base64.b64decode(contents) + return path, decoded + except TypeError: + raise exception.Base64Exception(path=path) + + return [_decode(f) for f in injected_files] + + def _cleanup_allocated_networks(self, context, instance, + requested_networks): + try: + self._deallocate_network(context, instance, requested_networks) + except Exception: + msg = _LE('Failed to deallocate networks') + LOG.exception(msg, instance=instance) + return + + instance.system_metadata['network_allocated'] = 'False' + try: + instance.save() + except exception.InstanceNotFound: + pass + + def _deallocate_network(self, context, instance, + requested_networks=None): + LOG.debug('Deallocating network for instance', instance=instance) + self.network_api.deallocate_for_instance( + context, instance, requested_networks=requested_networks) + + def _cleanup_volumes(self, context, instance_uuid, bdms, raise_exc=True): + exc_info = None + + for bdm in bdms: + LOG.debug("terminating bdm %s", bdm, + instance_uuid=instance_uuid) + if bdm.volume_id and bdm.delete_on_termination: + try: + self.volume_api.delete(context, bdm.volume_id) + except Exception as exc: + exc_info = sys.exc_info() + LOG.warn(_LW('Failed to delete volume: %(volume_id)s due ' + 'to %(exc)s'), {'volume_id': bdm.volume_id, + 'exc': unicode(exc)}) + if exc_info is not None and raise_exc: + six.reraise(exc_info[0], exc_info[1], exc_info[2]) + + def _instance_update(self, context, instance, **kwargs): + """Update an instance in the database using kwargs as value.""" + + for k, v in kwargs.items(): + setattr(instance, k, v) + instance.save() + + def _set_instance_obj_error_state(self, context, instance, + clean_task_state=False): + try: + instance.vm_state = vm_states.ERROR + if clean_task_state: + instance.task_state = None + instance.save() + except exception.InstanceNotFound: + LOG.debug('Instance has been destroyed from under us while ' + 'trying to set it to ERROR', instance=instance) + + def _notify_about_instance_usage(self, context, instance, event_suffix, + network_info=None, system_metadata=None, + extra_usage_info=None, fault=None): + compute_utils.notify_about_instance_usage( + self.notifier, context, instance, event_suffix, + network_info=network_info, + system_metadata=system_metadata, + extra_usage_info=extra_usage_info, fault=fault) + + def _validate_instance_group_policy(self, context, instance, + filter_properties): + # NOTE(russellb) Instance group policy is enforced by the scheduler. + # However, there is a race condition with the enforcement of + # anti-affinity. Since more than one instance may be scheduled at the + # same time, it's possible that more than one instance with an + # anti-affinity policy may end up here. This is a validation step to + # make sure that starting the instance here doesn't violate the policy. + + scheduler_hints = filter_properties.get('scheduler_hints') or {} + group_hint = scheduler_hints.get('group') + if not group_hint: + return + + @utils.synchronized(group_hint) + def _do_validation(context, instance, group_hint): + group = objects.InstanceGroup.get_by_hint(context, group_hint) + if 'anti-affinity' not in group.policies and ( + 'affinity' not in group.policies): + return + + group_hosts = group.get_hosts(context, exclude=[instance.uuid]) + if self.host in group_hosts: + if 'anti-affinity' in group.policies: + msg = _("Anti-affinity instance group policy " + "was violated.") + raise exception.RescheduledException( + instance_uuid=instance.uuid, + reason=msg) + elif group_hosts and [self.host] != group_hosts: + # NOTE(huawei) Native code only considered anti-affinity + # policy, but affinity policy also have the same problem. + # so we add checker for affinity policy instance. + if 'affinity' in group.policies: + msg = _("affinity instance group policy was violated.") + raise exception.RescheduledException( + instance_uuid=instance.uuid, + reason=msg) + + _do_validation(context, instance, group_hint) + + @wrap_exception() + @reverts_task_state + @wrap_instance_fault + def build_and_run_instance( + self, context, host, instance, image, request_spec, + filter_properties, admin_password=None, injected_files=None, + requested_networks=None, security_groups=None, + block_device_mapping=None, node=None, limits=None): + + if (requested_networks and + not isinstance(requested_networks, + objects.NetworkRequestList)): + requested_networks = objects.NetworkRequestList( + objects=[objects.NetworkRequest.from_tuple(t) + for t in requested_networks]) + + @utils.synchronized(instance.uuid) + def _locked_do_build_and_run_instance(*args, **kwargs): + self._do_build_and_run_instance(*args, **kwargs) + + utils.spawn_n(_locked_do_build_and_run_instance, + context, host, instance, image, request_spec, + filter_properties, admin_password, injected_files, + requested_networks, security_groups, + block_device_mapping, node, limits) + + @wrap_exception() + @reverts_task_state + @wrap_instance_event + @wrap_instance_fault + def _do_build_and_run_instance(self, context, host, instance, image, + request_spec, filter_properties, + admin_password, injected_files, + requested_networks, security_groups, + block_device_mapping, node=None, + limits=None): + + try: + LOG.debug(_('Starting instance...'), context=context, + instance=instance) + instance.vm_state = vm_states.BUILDING + instance.task_state = None + instance.save(expected_task_state=(task_states.SCHEDULING, None)) + except exception.InstanceNotFound: + msg = 'Instance disappeared before build.' + LOG.debug(msg, instance=instance) + return + except exception.UnexpectedTaskStateError as e: + LOG.debug(e.format_message(), instance=instance) + return + + # b64 decode the files to inject: + decoded_files = self._decode_files(injected_files) + + if limits is None: + limits = {} + + if node is None: + node = t_utils.get_node_name(host) + LOG.debug('No node specified, defaulting to %s', node, + instance=instance) + + try: + self._build_and_run_instance( + context, host, instance, image, request_spec, decoded_files, + admin_password, requested_networks, security_groups, + block_device_mapping, node, limits, filter_properties) + except exception.RescheduledException as e: + LOG.debug(e.format_message(), instance=instance) + retry = filter_properties.get('retry', None) + if not retry: + # no retry information, do not reschedule. + LOG.debug("Retry info not present, will not reschedule", + instance=instance) + self._cleanup_allocated_networks(context, instance, + requested_networks) + compute_utils.add_instance_fault_from_exc( + context, instance, e, sys.exc_info()) + self._set_instance_obj_error_state(context, instance, + clean_task_state=True) + return + retry['exc'] = traceback.format_exception(*sys.exc_info()) + + self.network_api.cleanup_instance_network_on_host( + context, instance, self.host) + + instance.task_state = task_states.SCHEDULING + instance.save() + + self.compute_task_api.build_instances( + context, [instance], image, filter_properties, admin_password, + injected_files, requested_networks, security_groups, + block_device_mapping) + except (exception.InstanceNotFound, + exception.UnexpectedDeletingTaskStateError): + msg = 'Instance disappeared during build.' + LOG.debug(msg, instance=instance) + self._cleanup_allocated_networks(context, instance, + requested_networks) + except exception.BuildAbortException as e: + LOG.exception(e.format_message(), instance=instance) + self._cleanup_allocated_networks(context, instance, + requested_networks) + self._cleanup_volumes(context, instance.uuid, + block_device_mapping, raise_exc=False) + compute_utils.add_instance_fault_from_exc( + context, instance, e, sys.exc_info()) + self._set_instance_obj_error_state(context, instance, + clean_task_state=True) + except Exception as e: + # should not reach here. + msg = _LE('Unexpected build failure, not rescheduling build.') + LOG.exception(msg, instance=instance) + self._cleanup_allocated_networks(context, instance, + requested_networks) + self._cleanup_volumes(context, instance.uuid, + block_device_mapping, raise_exc=False) + compute_utils.add_instance_fault_from_exc(context, instance, + e, sys.exc_info()) + self._set_instance_obj_error_state(context, instance, + clean_task_state=True) + + def _get_instance_nw_info(self, context, instance, use_slave=False): + """Get a list of dictionaries of network data of an instance.""" + return self.network_api.get_instance_nw_info(context, instance, + use_slave=use_slave) + + def _allocate_network(self, context, instance, requested_networks, macs, + security_groups, dhcp_options): + """Start network allocation asynchronously. + + Return an instance of NetworkInfoAsyncWrapper that can be used to + retrieve the allocated networks when the operation has finished. + """ + # NOTE(comstud): Since we're allocating networks asynchronously, + # this task state has little meaning, as we won't be in this + # state for very long. + instance.vm_state = vm_states.BUILDING + instance.task_state = task_states.NETWORKING + instance.save(expected_task_state=[None]) + + is_vpn = pipelib.is_vpn_image(instance.image_ref) + return network_model.NetworkInfoAsyncWrapper( + self._allocate_network_async, context, instance, + requested_networks, macs, security_groups, is_vpn, dhcp_options) + + def _allocate_network_async(self, context, instance, requested_networks, + macs, security_groups, is_vpn, dhcp_options): + """Method used to allocate networks in the background. + + Broken out for testing. + """ + LOG.debug("Allocating IP information in the background.", + instance=instance) + retries = CONF.network_allocate_retries + if retries < 0: + LOG.warn(_("Treating negative config value (%(retries)s) for " + "'network_allocate_retries' as 0."), + {'retries': retries}) + attempts = retries > 1 and retries + 1 or 1 + retry_time = 1 + for attempt in range(1, attempts + 1): + try: + nwinfo = self.network_api.allocate_for_instance( + context, instance, vpn=is_vpn, + requested_networks=requested_networks, + macs=macs, security_groups=security_groups, + dhcp_options=dhcp_options) + LOG.debug('Instance network_info: |%s|', nwinfo, + instance=instance) + instance.system_metadata['network_allocated'] = 'True' + # NOTE(JoshNang) do not save the instance here, as it can cause + # races. The caller shares a reference to instance and waits + # for this async greenthread to finish before calling + # instance.save(). + return nwinfo + except Exception: + exc_info = sys.exc_info() + log_info = {'attempt': attempt, + 'attempts': attempts} + if attempt == attempts: + LOG.exception(_LE('Instance failed network setup ' + 'after %(attempts)d attempt(s)'), + log_info) + raise exc_info[0], exc_info[1], exc_info[2] + LOG.warn(_('Instance failed network setup ' + '(attempt %(attempt)d of %(attempts)d)'), + log_info, instance=instance) + time.sleep(retry_time) + retry_time *= 2 + if retry_time > 30: + retry_time = 30 + # Not reached. + + def _build_networks_for_instance(self, context, instance, + requested_networks, security_groups): + + # If we're here from a reschedule the network may already be allocated. + if strutils.bool_from_string( + instance.system_metadata.get('network_allocated', 'False')): + # NOTE(alex_xu): The network_allocated is True means the network + # resource already allocated at previous scheduling, and the + # network setup is cleanup at previous. After rescheduling, the + # network resource need setup on the new host. + self.network_api.setup_instance_network_on_host( + context, instance, instance.host) + return self._get_instance_nw_info(context, instance) + + if not self.is_neutron_security_groups: + security_groups = [] + + # NOTE(zhiyuan) in ComputeManager, driver method "macs_for_instance" + # and "dhcp_options_for_instance" are called to get macs and + # dhcp_options, here we just set them to None + macs = None + dhcp_options = None + network_info = self._allocate_network(context, instance, + requested_networks, macs, + security_groups, dhcp_options) + + if not instance.access_ip_v4 and not instance.access_ip_v6: + # If CONF.default_access_ip_network_name is set, grab the + # corresponding network and set the access ip values accordingly. + # Note that when there are multiple ips to choose from, an + # arbitrary one will be chosen. + network_name = CONF.default_access_ip_network_name + if not network_name: + return network_info + + for vif in network_info: + if vif['network']['label'] == network_name: + for ip in vif.fixed_ips(): + if ip['version'] == 4: + instance.access_ip_v4 = ip['address'] + if ip['version'] == 6: + instance.access_ip_v6 = ip['address'] + instance.save() + break + + return network_info + + # NOTE(zhiyuan) the task of this function is to do some preparation job + # for driver and cinder volume, but in nova proxy _proxy_run_instance will + # do such job, remove this function after cinder proxy is ready and we + # confirm it is useless + def _prep_block_device(self, context, instance, bdms, + do_check_attach=True): + """Set up the block device for an instance with error logging.""" + try: + block_device_info = { + 'root_device_name': instance['root_device_name'], + 'swap': driver_block_device.convert_swap(bdms), + 'ephemerals': driver_block_device.convert_ephemerals(bdms), + 'block_device_mapping': ( + driver_block_device.attach_block_devices( + driver_block_device.convert_volumes(bdms), + context, instance, self.volume_api, + self.driver, do_check_attach=do_check_attach) + + driver_block_device.attach_block_devices( + driver_block_device.convert_snapshots(bdms), + context, instance, self.volume_api, + self.driver, self._await_block_device_map_created, + do_check_attach=do_check_attach) + + driver_block_device.attach_block_devices( + driver_block_device.convert_images(bdms), + context, instance, self.volume_api, + self.driver, self._await_block_device_map_created, + do_check_attach=do_check_attach) + + driver_block_device.attach_block_devices( + driver_block_device.convert_blanks(bdms), + context, instance, self.volume_api, + self.driver, self._await_block_device_map_created, + do_check_attach=do_check_attach)) + } + + if self.use_legacy_block_device_info: + for bdm_type in ('swap', 'ephemerals', 'block_device_mapping'): + block_device_info[bdm_type] = \ + driver_block_device.legacy_block_devices( + block_device_info[bdm_type]) + + # Get swap out of the list + block_device_info['swap'] = driver_block_device.get_swap( + block_device_info['swap']) + return block_device_info + + except exception.OverQuota: + msg = _LW('Failed to create block device for instance due to ' + 'being over volume resource quota') + LOG.warn(msg, instance=instance) + raise exception.InvalidBDM() + + except Exception: + LOG.exception(_LE('Instance failed block device setup'), + instance=instance) + raise exception.InvalidBDM() + + def _default_block_device_names(self, context, instance, + image_meta, block_devices): + """Verify that all the devices have the device_name set. + + If not, provide a default name. It also ensures that there is a + root_device_name and is set to the first block device in the boot + sequence (boot_index=0). + """ + root_bdm = block_device.get_root_bdm(block_devices) + if not root_bdm: + return + + # Get the root_device_name from the root BDM or the instance + root_device_name = None + update_instance = False + update_root_bdm = False + + if root_bdm.device_name: + root_device_name = root_bdm.device_name + instance.root_device_name = root_device_name + update_instance = True + elif instance.root_device_name: + root_device_name = instance.root_device_name + root_bdm.device_name = root_device_name + update_root_bdm = True + else: + # NOTE(zhiyuan) if driver doesn't implement related function, + # function in compute_utils will be called + root_device_name = compute_utils.get_next_device_name(instance, []) + + instance.root_device_name = root_device_name + root_bdm.device_name = root_device_name + update_instance = update_root_bdm = True + + if update_instance: + instance.save() + if update_root_bdm: + root_bdm.save() + + ephemerals = filter(block_device.new_format_is_ephemeral, + block_devices) + swap = filter(block_device.new_format_is_swap, + block_devices) + block_device_mapping = filter( + driver_block_device.is_block_device_mapping, block_devices) + + # NOTE(zhiyuan) if driver doesn't implement related function, + # function in compute_utils will be called + compute_utils.default_device_names_for_instance( + instance, root_device_name, ephemerals, swap, block_device_mapping) + + @contextlib.contextmanager + def _build_resources(self, context, instance, requested_networks, + security_groups, image, block_device_mapping): + resources = {} + network_info = None + try: + network_info = self._build_networks_for_instance( + context, instance, requested_networks, security_groups) + resources['network_info'] = network_info + except (exception.InstanceNotFound, + exception.UnexpectedDeletingTaskStateError): + raise + except exception.UnexpectedTaskStateError as e: + raise exception.BuildAbortException(instance_uuid=instance.uuid, + reason=e.format_message()) + except Exception: + # Because this allocation is async any failures are likely to occur + # when the driver accesses network_info during spawn(). + LOG.exception(_LE('Failed to allocate network(s)'), + instance=instance) + msg = _('Failed to allocate the network(s), not rescheduling.') + raise exception.BuildAbortException(instance_uuid=instance.uuid, + reason=msg) + + try: + # Verify that all the BDMs have a device_name set and assign a + # default to the ones missing it with the help of the driver. + self._default_block_device_names(context, instance, image, + block_device_mapping) + + instance.vm_state = vm_states.BUILDING + instance.task_state = task_states.BLOCK_DEVICE_MAPPING + instance.save() + + # NOTE(zhiyuan) remove this commented code after cinder proxy is + # ready and we confirm _prep_block_device is useless + # + # block_device_info = self._prep_block_device( + # context, instance, block_device_mapping) + # + block_device_info = None + resources['block_device_info'] = block_device_info + except (exception.InstanceNotFound, + exception.UnexpectedDeletingTaskStateError): + with excutils.save_and_reraise_exception() as ctxt: + # Make sure the async call finishes + if network_info is not None: + network_info.wait(do_raise=False) + except exception.UnexpectedTaskStateError as e: + # Make sure the async call finishes + if network_info is not None: + network_info.wait(do_raise=False) + raise exception.BuildAbortException(instance_uuid=instance.uuid, + reason=e.format_message()) + except Exception: + LOG.exception(_LE('Failure prepping block device'), + instance=instance) + # Make sure the async call finishes + if network_info is not None: + network_info.wait(do_raise=False) + msg = _('Failure prepping block device.') + raise exception.BuildAbortException(instance_uuid=instance.uuid, + reason=msg) + + self._heal_proxy_networks(context, instance, network_info) + cascaded_ports = self._heal_proxy_ports( + context, instance, network_info) + resources['cascaded_ports'] = cascaded_ports + + try: + yield resources + except Exception as exc: + with excutils.save_and_reraise_exception() as ctxt: + if not isinstance(exc, ( + exception.InstanceNotFound, + exception.UnexpectedDeletingTaskStateError)): + LOG.exception(_LE('Instance failed to spawn'), + instance=instance) + # Make sure the async call finishes + if network_info is not None: + network_info.wait(do_raise=False) + try: + self._shutdown_instance(context, instance, + block_device_mapping, + requested_networks, + try_deallocate_networks=False) + except Exception: + ctxt.reraise = False + msg = _('Could not clean up failed build,' + ' not rescheduling') + raise exception.BuildAbortException( + instance_uuid=instance.uuid, reason=msg) + + def _build_and_run_instance(self, context, host, instance, image, + request_spec, injected_files, admin_password, + requested_networks, security_groups, + block_device_mapping, node, limits, + filter_properties): + + image_name = image.get('name') + self._notify_about_instance_usage(context, instance, 'create.start', + extra_usage_info={ + 'image_name': image_name}) + try: + self._validate_instance_group_policy(context, instance, + filter_properties) + with self._build_resources(context, instance, requested_networks, + security_groups, image, + block_device_mapping) as resources: + instance.vm_state = vm_states.BUILDING + instance.task_state = task_states.SPAWNING + instance.save( + expected_task_state=task_states.BLOCK_DEVICE_MAPPING) + cascaded_ports = resources['cascaded_ports'] + request_spec['block_device_mapping'] = block_device_mapping + request_spec['security_group'] = security_groups + self._proxy_run_instance( + context, instance, request_spec, filter_properties, + requested_networks, injected_files, admin_password, + None, host, node, None, cascaded_ports) + + except (exception.InstanceNotFound, + exception.UnexpectedDeletingTaskStateError) as e: + with excutils.save_and_reraise_exception(): + self._notify_about_instance_usage(context, instance, + 'create.end', fault=e) + except exception.ComputeResourcesUnavailable as e: + LOG.debug(e.format_message(), instance=instance) + self._notify_about_instance_usage(context, instance, + 'create.error', fault=e) + raise exception.RescheduledException( + instance_uuid=instance.uuid, reason=e.format_message()) + except exception.BuildAbortException as e: + with excutils.save_and_reraise_exception(): + LOG.debug(e.format_message(), instance=instance) + self._notify_about_instance_usage(context, instance, + 'create.error', fault=e) + except (exception.FixedIpLimitExceeded, + exception.NoMoreNetworks) as e: + LOG.warn(_LW('No more network or fixed IP to be allocated'), + instance=instance) + self._notify_about_instance_usage(context, instance, + 'create.error', fault=e) + msg = _('Failed to allocate the network(s) with error %s, ' + 'not rescheduling.') % e.format_message() + raise exception.BuildAbortException(instance_uuid=instance.uuid, + reason=msg) + except (exception.VirtualInterfaceCreateException, + exception.VirtualInterfaceMacAddressException) as e: + LOG.exception(_LE('Failed to allocate network(s)'), + instance=instance) + self._notify_about_instance_usage(context, instance, + 'create.error', fault=e) + msg = _('Failed to allocate the network(s), not rescheduling.') + raise exception.BuildAbortException(instance_uuid=instance.uuid, + reason=msg) + except (exception.FlavorDiskTooSmall, + exception.FlavorMemoryTooSmall, + exception.ImageNotActive, + exception.ImageUnacceptable) as e: + self._notify_about_instance_usage(context, instance, + 'create.error', fault=e) + raise exception.BuildAbortException(instance_uuid=instance.uuid, + reason=e.format_message()) + except Exception as e: + self._notify_about_instance_usage(context, instance, + 'create.error', fault=e) + raise exception.RescheduledException( + instance_uuid=instance.uuid, reason=six.text_type(e)) + + def _shutdown_instance(self, context, instance, bdms, + requested_networks=None, notify=True, + try_deallocate_networks=True): + LOG.debug('Proxy stop instance') + + # proxy new function below + + def _heal_proxy_networks(self, context, instance, network_info): + pass + + def _heal_proxy_ports(self, context, instance, network_info): + return [] + + def _proxy_run_instance(self, context, instance, request_spec=None, + filter_properties=None, requested_networks=None, + injected_files=None, admin_password=None, + is_first_time=False, host=None, node=None, + legacy_bdm_in_spec=True, physical_ports=None): + LOG.debug('Proxy run instance') diff --git a/tricircle/proxy/service.py b/tricircle/proxy/service.py new file mode 100644 index 00000000..0e4598aa --- /dev/null +++ b/tricircle/proxy/service.py @@ -0,0 +1,42 @@ +# Copyright 2015 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from oslo_config.cfg import CONF + +import tricircle.common.service as t_service +from tricircle.common.utils import get_import_path +from tricircle.proxy.compute_manager import ProxyComputeManager + +_REPORT_INTERVAL = 30 +_REPORT_INTERVAL_MAX = 60 + + +def setup_server(): + service = t_service.NovaService( + host=CONF.host, + # NOTE(zhiyuan) binary needs to start with "nova-" + # if nova service is used + binary="nova-proxy", + topic="proxy", + db_allowed=False, + periodic_enable=True, + report_interval=_REPORT_INTERVAL, + periodic_interval_max=_REPORT_INTERVAL_MAX, + manager=get_import_path(ProxyComputeManager), + ) + + t_service.fix_compute_service_exchange(service) + + return service