cinder/cinder/volume/drivers/ibm/ibm_storage/xiv_replication.py

#  Copyright (c) 2017 IBM Corporation
#  All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.
#
import six

from oslo_log import log as logging
from oslo_utils import importutils

pyxcli = importutils.try_import("pyxcli")
if pyxcli:
    from pyxcli import errors
    from pyxcli.mirroring import cg_recovery_manager
    from pyxcli.mirroring import errors as m_errors
    from pyxcli.mirroring import volume_recovery_manager

from cinder.i18n import _
from cinder.volume.drivers.ibm.ibm_storage import strings

SYNC = 'sync'
ASYNC = 'async'

LOG = logging.getLogger(__name__)


class Rate(object):

    def __init__(self, rpo, schedule):
        self.rpo = rpo
        self.schedule = schedule
        self.schedule_name = self._schedule_name_from_schedule(self.schedule)

    def _schedule_name_from_schedule(self, schedule):
        if schedule == '00:00:20':
            return 'min_interval'
        return ("cinder_%(sched)s" %
                {'sched': schedule.replace(':', '_')})


class Replication(object):

    async_rates = (
        Rate(rpo=120, schedule='00:01:00'),
        Rate(rpo=300, schedule='00:02:00'),
        Rate(rpo=600, schedule='00:05:00'),
        Rate(rpo=1200, schedule='00:10:00'),
    )

    def __init__(self, proxy):
        self.proxy = proxy

    @staticmethod
    def get_schedule_from_rpo(rpo):
        schedule = [rate for rate in Replication.async_rates
                    if rate.rpo == rpo][0].schedule_name
        if schedule:
            LOG.debug('schedule %(sched)s: for rpo %(rpo)s',
                      {'sched': schedule, 'rpo': rpo})
        else:
            LOG.error('Failed to find schedule for rpo %(rpo)s',
                      {'rpo': rpo})
        return schedule

    @staticmethod
    def get_supported_rpo():
        return [rate.rpo for rate in Replication.async_rates]

    def get_recovery_mgr(self):
        # Recovery manager is set in derived classes
        raise NotImplementedError

    def get_remote_recovery_mgr(self):
        # Recovery manager is set in derived classes
        raise NotImplementedError

    def replication_create_mirror(self, resource, replication_info,
                                  target, pool):
        raise NotImplementedError

    @staticmethod
    def extract_replication_info_from_specs(specs):
        info = {'enabled': False, 'mode': None, 'rpo': 0}
        msg = ""
        if specs:
            LOG.debug('extract_replication_info_from_specs: specs %(specs)s',
                      {'specs': specs})

            info['enabled'] = (
                specs.get('replication_enabled', '').upper() in
                (u'TRUE', strings.METADATA_IS_TRUE) or
                specs.get('group_replication_enabled', '').upper() in
                (u'TRUE', strings.METADATA_IS_TRUE))

            replication_type = specs.get('replication_type', SYNC).lower()
            if replication_type in (u'sync', u'<is> sync'):
                info['mode'] = SYNC
            elif replication_type in (u'async', u'<is> async'):
                info['mode'] = ASYNC
            else:
                msg = (_("Unsupported replication mode %(mode)s")
                       % {'mode': replication_type})
                return None, msg
            info['rpo'] = int(specs.get('rpo', u'<is> 0')[5:])
            supported_rpos = Replication.get_supported_rpo()
            if info['rpo'] and info['rpo'] not in supported_rpos:
                msg = (_("Unsupported replication RPO %(rpo)s"),
                       {'rpo': info['rpo']})
                return None, msg

            LOG.debug('extract_replication_info_from_specs: info %(info)s',
                      {'info': info})
        return info, msg

    def failover(self, resource, failback):
        raise NotImplementedError

    def create_replication(self, resource_name, replication_info):
        LOG.debug('Replication::create_replication replication_info %(rep)s',
                  {'rep': replication_info})

        target, params = self.proxy._get_replication_target_params()
        LOG.info('Target %(target)s: %(params)s',
                 {'target': target, 'params': six.text_type(params)})

        try:
            pool = params['san_clustername']
        except Exception:
            msg = (_("Missing pool information for target '%(target)s'") %
                   {'target': target})
            LOG.error(msg)
            raise self.proxy.meta['exception'].VolumeBackendAPIException(
                data=msg)

        self.replication_create_mirror(resource_name, replication_info,
                                       target, pool)

    def delete_replication(self, resource_name, replication_info):
        LOG.debug('Replication::delete_replication replication_info %(rep)s',
                  {'rep': replication_info})

        recovery_mgr = self.get_recovery_mgr()

        try:
            recovery_mgr.deactivate_mirror(resource_id=resource_name)
        except Exception as e:
            details = self.proxy._get_code_and_status_or_message(e)
            msg = (_("Failed ending replication for %(resource)s: "
                     "'%(details)s'") % {'resource': resource_name,
                                         'details': details})
            LOG.error(msg)
            raise self.proxy.meta['exception'].VolumeBackendAPIException(
                data=msg)
        try:
            recovery_mgr.delete_mirror(resource_id=resource_name)
        except Exception as e:
            details = self.proxy._get_code_and_status_or_message(e)
            msg = (_("Failed deleting replica for %(resource)s: "
                     "'%(details)s'") % {'resource': resource_name,
                                         'details': details})
            LOG.error(msg)
            raise self.proxy.meta['exception'].VolumeBackendAPIException(
                data=msg)

    def _failover_resource(self, resource, recovery_mgr, failover_rep_mgr,
                           rep_type, failback):
        # check if mirror is defined and active
        LOG.debug('Check if mirroring is active on %(res)s',
                  {'res': resource['name']})
        try:
            active = recovery_mgr.is_mirror_active(
                resource_id=resource['name'])
        except Exception:
            active = False
        state = 'active' if active else 'inactive'
        LOG.debug('Mirroring is %(state)s', {'state': state})

        # In case of failback, mirroring must be active
        # In case of failover we attempt to move in any condition
        if failback and not active:
            msg = ("%(rep_type)s %(res)s: no active mirroring and can not "
                   "failback" % {'rep_type': rep_type,
                                 'res': resource['name']})
            LOG.error(msg)
            return False, msg

        try:
            if rep_type == 'cg':
                resource['name'] = self.proxy._cg_name_from_group(resource)
            recovery_mgr.switch_roles(resource_id=resource['name'])
            return True, None
        except Exception as e:
            # failed attempt to switch_roles from the master
            details = self.proxy._get_code_and_status_or_message(e)
            LOG.warning('Failed to perform switch_roles on'
                        ' %(res)s: %(err)s. '
                        'Continue to change_role',
                        {'res': resource['name'], 'err': details})
        try:
            # this is the ugly stage we come to brute force
            if failback:
                role = 'Slave'
            else:
                role = 'Master'
            LOG.warning('Attempt to change_role to %(role)s', {'role': role})
            failover_rep_mgr.change_role(resource_id=resource['name'],
                                         new_role=role)
            return True, None
        except m_errors.NoMirrorDefinedError as e:
            details = self.proxy._get_code_and_status_or_message(e)
            msg = ("%(rep_type)s %(res)s no replication defined: %(err)s" %
                   {'rep_type': rep_type, 'res': resource['name'],
                    'err': details})
            LOG.error(msg)
            return False, msg
        except Exception as e:
            details = self.proxy._get_code_and_status_or_message(e)
            msg = ('%(rep_type)s %(res)s change_role failed: %(err)s' %
                   {'rep_type': rep_type, 'res': resource['name'],
                    'err': details})
            LOG.error(msg)
            return False, msg


class VolumeReplication(Replication):

    def __init__(self, proxy):
        super(VolumeReplication, self).__init__(proxy)

    def get_recovery_mgr(self):
        return volume_recovery_manager.VolumeRecoveryManager(
            False, self.proxy.ibm_storage_cli)

    def get_remote_recovery_mgr(self):
        return volume_recovery_manager.VolumeRecoveryManager(
            True, self.proxy.ibm_storage_remote_cli)

    def replication_create_mirror(self, resource_name, replication_info,
                                  target, pool):
        LOG.debug('VolumeReplication::replication_create_mirror')

        schedule = None
        if replication_info['rpo']:
            schedule = Replication.get_schedule_from_rpo(
                replication_info['rpo'])
        try:
            recovery_mgr = self.get_recovery_mgr()
            recovery_mgr.create_mirror(
                resource_name=resource_name,
                target_name=target,
                mirror_type=replication_info['mode'],
                slave_resource_name=resource_name,
                create_slave='yes',
                remote_pool=pool,
                rpo=replication_info['rpo'],
                schedule=schedule,
                activate_mirror='yes')
        except errors.RemoteVolumeExists:
            # if volume exists (same ID), don't create slave
            # This only happens when vol is a part of a cg
            recovery_mgr.create_mirror(
                resource_name=resource_name,
                target_name=target,
                mirror_type=replication_info['mode'],
                slave_resource_name=resource_name,
                create_slave='no',
                remote_pool=pool,
                rpo=replication_info['rpo'],
                schedule=schedule,
                activate_mirror='yes')
        except errors.VolumeMasterError:
            LOG.debug('Volume %(vol)s has been already mirrored',
                      {'vol': resource_name})
        except Exception as e:
            details = self.proxy._get_code_and_status_or_message(e)
            msg = (_("Failed replication for %(resource)s: '%(details)s'") %
                   {'resource': resource_name, 'details': details})
            LOG.error(msg)
            raise self.proxy.meta['exception'].VolumeBackendAPIException(
                data=msg)

    def failover(self, resource, failback):
        """Failover a single volume.

        Attempts to failover a single volume
        Sequence:
        1. attempt to switch roles from master
        2. attempt to change role to master on secondary

        returns (success, failure_reason)
        """
        LOG.debug("VolumeReplication::failover %(vol)s",
                  {'vol': resource['name']})

        recovery_mgr = self.get_recovery_mgr()
        remote_recovery_mgr = self.get_remote_recovery_mgr()
        return self._failover_resource(resource, recovery_mgr,
                                       remote_recovery_mgr, 'vol', failback)


class GroupReplication(Replication):

    def __init__(self, proxy):
        super(GroupReplication, self).__init__(proxy)

    def get_recovery_mgr(self):
        return cg_recovery_manager.CGRecoveryManager(
            False, self.proxy.ibm_storage_cli)

    def get_remote_recovery_mgr(self):
        return cg_recovery_manager.CGRecoveryManager(
            True, self.proxy.ibm_storage_remote_cli)

    def replication_create_mirror(self, resource_name, replication_info,
                                  target, pool):
        LOG.debug('GroupReplication::replication_create_mirror')
        schedule = None
        if replication_info['rpo']:
            schedule = Replication.get_schedule_from_rpo(
                replication_info['rpo'])
        try:
            recovery_mgr = self.get_recovery_mgr()
            recovery_mgr.create_mirror(
                resource_name=resource_name,
                target_name=target,
                mirror_type=replication_info['mode'],
                slave_resource_name=resource_name,
                rpo=replication_info['rpo'],
                schedule=schedule,
                activate_mirror='yes')
        except Exception as e:
            details = self.proxy._get_code_and_status_or_message(e)
            msg = (_("Failed replication for %(resource)s: '%(details)s'"),
                   {'resource': resource_name, 'details': details})
            LOG.error(msg)
            raise self.proxy.meta['exception'].VolumeBackendAPIException(
                data=msg)

    def failover(self, resource, failback):
        LOG.debug("GroupReplication::failover %(cg)s",
                  {'cg': resource['name']})

        recovery_mgr = self.get_recovery_mgr()
        remote_recovery_mgr = self.get_remote_recovery_mgr()

        return self._failover_resource(resource, recovery_mgr,
                                       remote_recovery_mgr, 'cg', failback)