cinder/cinder/volume/drivers/glusterfs.py

# Copyright (c) 2013 Red Hat, Inc.
# All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import errno
import os
import stat
import time

from oslo.config import cfg

from cinder.brick.remotefs import remotefs as remotefs_brick
from cinder import compute
from cinder import db
from cinder import exception
from cinder.i18n import _
from cinder.image import image_utils
from cinder.openstack.common import fileutils
from cinder.openstack.common import log as logging
from cinder.openstack.common import processutils
from cinder.openstack.common import units
from cinder import utils
from cinder.volume.drivers import remotefs as remotefs_drv

LOG = logging.getLogger(__name__)

volume_opts = [
    cfg.StrOpt('glusterfs_shares_config',
               default='/etc/cinder/glusterfs_shares',
               help='File with the list of available gluster shares'),
    cfg.BoolOpt('glusterfs_sparsed_volumes',
                default=True,
                help=('Create volumes as sparsed files which take no space.'
                      'If set to False volume is created as regular file.'
                      'In such case volume creation takes a lot of time.')),
    cfg.BoolOpt('glusterfs_qcow2_volumes',
                default=False,
                help=('Create volumes as QCOW2 files rather than raw files.')),
    cfg.StrOpt('glusterfs_mount_point_base',
               default='$state_path/mnt',
               help='Base dir containing mount points for gluster shares.'),
]

CONF = cfg.CONF
CONF.register_opts(volume_opts)
CONF.import_opt('volume_name_template', 'cinder.db')


class GlusterfsDriver(remotefs_drv.RemoteFSSnapDriver):
    """Gluster based cinder driver. Creates file on Gluster share for using it
    as block device on hypervisor.

    Operations such as create/delete/extend volume/snapshot use locking on a
    per-process basis to prevent multiple threads from modifying qcow2 chains
    or the snapshot .info file simultaneously.
    """

    driver_volume_type = 'glusterfs'
    driver_prefix = 'glusterfs'
    volume_backend_name = 'GlusterFS'
    VERSION = '1.2.0'

    def __init__(self, execute=processutils.execute, *args, **kwargs):
        self._remotefsclient = None
        super(GlusterfsDriver, self).__init__(*args, **kwargs)
        self.configuration.append_config_values(volume_opts)
        self._nova = None
        self.base = getattr(self.configuration,
                            'glusterfs_mount_point_base',
                            CONF.glusterfs_mount_point_base)
        self._remotefsclient = remotefs_brick.RemoteFsClient(
            'glusterfs',
            execute,
            glusterfs_mount_point_base=self.base)

    def set_execute(self, execute):
        super(GlusterfsDriver, self).set_execute(execute)
        if self._remotefsclient:
            self._remotefsclient.set_execute(execute)

    def do_setup(self, context):
        """Any initialization the volume driver does while starting."""
        super(GlusterfsDriver, self).do_setup(context)

        self._nova = compute.API()

        config = self.configuration.glusterfs_shares_config
        if not config:
            msg = (_("There's no Gluster config file configured (%s)") %
                   'glusterfs_shares_config')
            LOG.warn(msg)
            raise exception.GlusterfsException(msg)
        if not os.path.exists(config):
            msg = (_("Gluster config file at %(config)s doesn't exist") %
                   {'config': config})
            LOG.warn(msg)
            raise exception.GlusterfsException(msg)

        self.shares = {}

        try:
            self._execute('mount.glusterfs', check_exit_code=False)
        except OSError as exc:
            if exc.errno == errno.ENOENT:
                raise exception.GlusterfsException(
                    _('mount.glusterfs is not installed'))
            else:
                raise

        self._refresh_mounts()

    def _unmount_shares(self):
        self._load_shares_config(self.configuration.glusterfs_shares_config)
        for share in self.shares.keys():
            try:
                self._do_umount(True, share)
            except Exception as exc:
                LOG.warning(_('Exception during unmounting %s') % (exc))

    def _do_umount(self, ignore_not_mounted, share):
        mount_path = self._get_mount_point_for_share(share)
        command = ['umount', mount_path]
        try:
            self._execute(*command, run_as_root=True)
        except processutils.ProcessExecutionError as exc:
            if ignore_not_mounted and 'not mounted' in exc.stderr:
                LOG.info(_("%s is already umounted"), share)
            else:
                LOG.error(_("Failed to umount %(share)s, reason=%(stderr)s"),
                          {'share': share, 'stderr': exc.stderr})
                raise

    def _refresh_mounts(self):
        try:
            self._unmount_shares()
        except processutils.ProcessExecutionError as exc:
            if 'target is busy' in exc.stderr:
                LOG.warn(_("Failed to refresh mounts, reason=%s") %
                         exc.stderr)
            else:
                raise

        self._ensure_shares_mounted()

    def _qemu_img_info(self, path, volume_name):
        return super(GlusterfsDriver, self)._qemu_img_info_base(
            path, volume_name, self.configuration.glusterfs_mount_point_base)

    def check_for_setup_error(self):
        """Just to override parent behavior."""
        pass

    def _local_volume_dir(self, volume):
        hashed = self._get_hash_str(volume['provider_location'])
        path = '%s/%s' % (self.configuration.glusterfs_mount_point_base,
                          hashed)
        return path

    @utils.synchronized('glusterfs', external=False)
    def create_cloned_volume(self, volume, src_vref):
        """Creates a clone of the specified volume."""
        self._create_cloned_volume(volume, src_vref)

    @utils.synchronized('glusterfs', external=False)
    def create_volume(self, volume):
        """Creates a volume."""

        self._ensure_shares_mounted()

        volume['provider_location'] = self._find_share(volume['size'])

        LOG.info(_('casted to %s') % volume['provider_location'])

        self._do_create_volume(volume)

        return {'provider_location': volume['provider_location']}

    @utils.synchronized('glusterfs', external=False)
    def create_volume_from_snapshot(self, volume, snapshot):
        self._create_volume_from_snapshot(volume, snapshot)

    def _copy_volume_from_snapshot(self, snapshot, volume, volume_size):
        """Copy data from snapshot to destination volume.

        This is done with a qemu-img convert to raw/qcow2 from the snapshot
        qcow2.
        """

        LOG.debug("snapshot: %(snap)s, volume: %(vol)s, "
                  "volume_size: %(size)s"
                  % {'snap': snapshot['id'],
                     'vol': volume['id'],
                     'size': volume_size})

        info_path = self._local_path_volume_info(snapshot['volume'])
        snap_info = self._read_info_file(info_path)
        vol_path = self._local_volume_dir(snapshot['volume'])
        forward_file = snap_info[snapshot['id']]
        forward_path = os.path.join(vol_path, forward_file)

        # Find the file which backs this file, which represents the point
        # when this snapshot was created.
        img_info = self._qemu_img_info(forward_path,
                                       snapshot['volume']['name'])
        path_to_snap_img = os.path.join(vol_path, img_info.backing_file)

        path_to_new_vol = self._local_path_volume(volume)

        LOG.debug("will copy from snapshot at %s" % path_to_snap_img)

        if self.configuration.glusterfs_qcow2_volumes:
            out_format = 'qcow2'
        else:
            out_format = 'raw'

        image_utils.convert_image(path_to_snap_img,
                                  path_to_new_vol,
                                  out_format)

        self._set_rw_permissions_for_all(path_to_new_vol)

    @utils.synchronized('glusterfs', external=False)
    def delete_volume(self, volume):
        """Deletes a logical volume."""

        if not volume['provider_location']:
            LOG.warn(_('Volume %s does not have provider_location specified, '
                     'skipping'), volume['name'])
            return

        self._ensure_share_mounted(volume['provider_location'])

        volume_dir = self._local_volume_dir(volume)
        mounted_path = os.path.join(volume_dir,
                                    self.get_active_image_from_info(volume))

        self._execute('rm', '-f', mounted_path, run_as_root=True)

        # If an exception (e.g. timeout) occurred during delete_snapshot, the
        # base volume may linger around, so just delete it if it exists
        base_volume_path = self._local_path_volume(volume)
        fileutils.delete_if_exists(base_volume_path)

        info_path = self._local_path_volume_info(volume)
        fileutils.delete_if_exists(info_path)

    @utils.synchronized('glusterfs', external=False)
    def create_snapshot(self, snapshot):
        """Apply locking to the create snapshot operation."""

        return self._create_snapshot(snapshot)

    def _get_matching_backing_file(self, backing_chain, snapshot_file):
        return next(f for f in backing_chain
                    if f.get('backing-filename', '') == snapshot_file)

    @utils.synchronized('glusterfs', external=False)
    def delete_snapshot(self, snapshot):
        """Apply locking to the delete snapshot operation."""
        self._delete_snapshot(snapshot)

    def _delete_snapshot_online(self, context, snapshot, info):
        # Update info over the course of this method
        # active file never changes
        info_path = self._local_path_volume(snapshot['volume']) + '.info'
        snap_info = self._read_info_file(info_path)

        if info['active_file'] == info['snapshot_file']:
            # blockRebase/Pull base into active
            # info['base'] => snapshot_file

            file_to_delete = info['base_file']
            if info['base_id'] is None:
                # Passing base=none to blockRebase ensures that
                # libvirt blanks out the qcow2 backing file pointer
                new_base = None
            else:
                new_base = info['new_base_file']
                snap_info[info['base_id']] = info['snapshot_file']

            delete_info = {'file_to_merge': new_base,
                           'merge_target_file': None,  # current
                           'type': 'qcow2',
                           'volume_id': snapshot['volume']['id']}

            del(snap_info[snapshot['id']])
        else:
            # blockCommit snapshot into base
            # info['base'] <= snapshot_file
            # delete record of snapshot
            file_to_delete = info['snapshot_file']

            delete_info = {'file_to_merge': info['snapshot_file'],
                           'merge_target_file': info['base_file'],
                           'type': 'qcow2',
                           'volume_id': snapshot['volume']['id']}

            del(snap_info[snapshot['id']])

        try:
            self._nova.delete_volume_snapshot(
                context,
                snapshot['id'],
                delete_info)
        except Exception as e:
            LOG.error(_('Call to Nova delete snapshot failed'))
            LOG.exception(e)
            raise e

        # Loop and wait for result
        # Nova will call Cinderclient to update the status in the database
        # An update of progress = '90%' means that Nova is done
        seconds_elapsed = 0
        increment = 1
        timeout = 7200
        while True:
            s = db.snapshot_get(context, snapshot['id'])

            if s['status'] == 'deleting':
                if s['progress'] == '90%':
                    # Nova tasks completed successfully
                    break
                else:
                    msg = ('status of snapshot %s is '
                           'still "deleting"... waiting') % snapshot['id']
                    LOG.debug(msg)
                    time.sleep(increment)
                    seconds_elapsed += increment
            else:
                msg = _('Unable to delete snapshot %(id)s, '
                        'status: %(status)s.') % {'id': snapshot['id'],
                                                  'status': s['status']}
                raise exception.GlusterfsException(msg)

            if 10 < seconds_elapsed <= 20:
                increment = 2
            elif 20 < seconds_elapsed <= 60:
                increment = 5
            elif 60 < seconds_elapsed:
                increment = 10

            if seconds_elapsed > timeout:
                msg = _('Timed out while waiting for Nova update '
                        'for deletion of snapshot %(id)s.') %\
                    {'id': snapshot['id']}
                raise exception.GlusterfsException(msg)

        # Write info file updated above
        self._write_info_file(info_path, snap_info)

        # Delete stale file
        path_to_delete = os.path.join(
            self._local_volume_dir(snapshot['volume']), file_to_delete)
        self._execute('rm', '-f', path_to_delete, run_as_root=True)

    def ensure_export(self, ctx, volume):
        """Synchronously recreates an export for a logical volume."""

        self._ensure_share_mounted(volume['provider_location'])

    def create_export(self, ctx, volume):
        """Exports the volume."""
        pass

    def remove_export(self, ctx, volume):
        """Removes an export for a logical volume."""

        pass

    def validate_connector(self, connector):
        pass

    @utils.synchronized('glusterfs', external=False)
    def initialize_connection(self, volume, connector):
        """Allow connection to connector and return connection info."""

        # Find active qcow2 file
        active_file = self.get_active_image_from_info(volume)
        path = '%s/%s/%s' % (self.configuration.glusterfs_mount_point_base,
                             self._get_hash_str(volume['provider_location']),
                             active_file)

        data = {'export': volume['provider_location'],
                'name': active_file}
        if volume['provider_location'] in self.shares:
            data['options'] = self.shares[volume['provider_location']]

        # Test file for raw vs. qcow2 format
        info = self._qemu_img_info(path, volume['name'])
        data['format'] = info.file_format
        if data['format'] not in ['raw', 'qcow2']:
            msg = _('%s must be a valid raw or qcow2 image.') % path
            raise exception.InvalidVolume(msg)

        return {
            'driver_volume_type': 'glusterfs',
            'data': data,
            'mount_point_base': self._get_mount_point_base()
        }

    def terminate_connection(self, volume, connector, **kwargs):
        """Disallow connection from connector."""
        pass

    @utils.synchronized('glusterfs', external=False)
    def copy_volume_to_image(self, context, volume, image_service, image_meta):
        self._copy_volume_to_image(context, volume, image_service,
                                   image_meta)

    @utils.synchronized('glusterfs', external=False)
    def extend_volume(self, volume, size_gb):
        volume_path = self.local_path(volume)
        volume_filename = os.path.basename(volume_path)

        # Ensure no snapshots exist for the volume
        active_image = self.get_active_image_from_info(volume)
        if volume_filename != active_image:
            msg = _('Extend volume is only supported for this'
                    ' driver when no snapshots exist.')
            raise exception.InvalidVolume(msg)

        info = self._qemu_img_info(volume_path, volume['name'])
        backing_fmt = info.file_format

        if backing_fmt not in ['raw', 'qcow2']:
            msg = _('Unrecognized backing format: %s')
            raise exception.InvalidVolume(msg % backing_fmt)

        # qemu-img can resize both raw and qcow2 files
        image_utils.resize_image(volume_path, size_gb)

    def _do_create_volume(self, volume):
        """Create a volume on given glusterfs_share.

        :param volume: volume reference
        """

        volume_path = self.local_path(volume)
        volume_size = volume['size']

        LOG.debug("creating new volume at %s" % volume_path)

        if os.path.exists(volume_path):
            msg = _('file already exists at %s') % volume_path
            LOG.error(msg)
            raise exception.InvalidVolume(reason=msg)

        if self.configuration.glusterfs_qcow2_volumes:
            self._create_qcow2_file(volume_path, volume_size)
        else:
            if self.configuration.glusterfs_sparsed_volumes:
                self._create_sparsed_file(volume_path, volume_size)
            else:
                self._create_regular_file(volume_path, volume_size)

        self._set_rw_permissions_for_all(volume_path)

    def _ensure_shares_mounted(self):
        """Mount all configured GlusterFS shares."""

        self._mounted_shares = []

        self._load_shares_config(self.configuration.glusterfs_shares_config)

        for share in self.shares.keys():
            try:
                self._ensure_share_mounted(share)
                self._mounted_shares.append(share)
            except Exception as exc:
                LOG.error(_('Exception during mounting %s') % (exc,))

        LOG.debug('Available shares: %s' % self._mounted_shares)

    def _ensure_share_mounted(self, glusterfs_share):
        """Mount GlusterFS share.
        :param glusterfs_share: string
        """
        mount_path = self._get_mount_point_for_share(glusterfs_share)
        self._mount_glusterfs(glusterfs_share, mount_path, ensure=True)

        # Ensure we can write to this share
        group_id = os.getegid()
        current_group_id = utils.get_file_gid(mount_path)
        current_mode = utils.get_file_mode(mount_path)

        if group_id != current_group_id:
            cmd = ['chgrp', group_id, mount_path]
            self._execute(*cmd, run_as_root=True)

        if not (current_mode & stat.S_IWGRP):
            cmd = ['chmod', 'g+w', mount_path]
            self._execute(*cmd, run_as_root=True)

        self._ensure_share_writable(mount_path)

    def _find_share(self, volume_size_for):
        """Choose GlusterFS share among available ones for given volume size.
        Current implementation looks for greatest capacity.
        :param volume_size_for: int size in GB
        """

        if not self._mounted_shares:
            raise exception.GlusterfsNoSharesMounted()

        greatest_size = 0
        greatest_share = None

        for glusterfs_share in self._mounted_shares:
            capacity = self._get_available_capacity(glusterfs_share)[0]
            if capacity > greatest_size:
                greatest_share = glusterfs_share
                greatest_size = capacity

        if volume_size_for * units.Gi > greatest_size:
            raise exception.GlusterfsNoSuitableShareFound(
                volume_size=volume_size_for)
        return greatest_share

    def _mount_glusterfs(self, glusterfs_share, mount_path, ensure=False):
        """Mount GlusterFS share to mount path."""
        # TODO(eharney): make this fs-agnostic and factor into remotefs
        self._execute('mkdir', '-p', mount_path)

        command = ['mount', '-t', 'glusterfs', glusterfs_share,
                   mount_path]
        if self.shares.get(glusterfs_share) is not None:
            command.extend(self.shares[glusterfs_share].split())

        self._do_mount(command, ensure, glusterfs_share)

    def backup_volume(self, context, backup, backup_service):
        """Create a new backup from an existing volume.

        Allow a backup to occur only if no snapshots exist.
        Check both Cinder and the file on-disk.  The latter is only
        a safety mechanism to prevent further damage if the snapshot
        information is already inconsistent.
        """

        snapshots = self.db.snapshot_get_all_for_volume(context,
                                                        backup['volume_id'])
        snap_error_msg = _('Backup is not supported for GlusterFS '
                           'volumes with snapshots.')
        if len(snapshots) > 0:
            raise exception.InvalidVolume(snap_error_msg)

        volume = self.db.volume_get(context, backup['volume_id'])

        volume_dir = self._local_volume_dir(volume)
        active_file_path = os.path.join(
            volume_dir,
            self.get_active_image_from_info(volume))

        info = self._qemu_img_info(active_file_path, volume['name'])

        if info.backing_file is not None:
            msg = _('No snapshots found in database, but '
                    '%(path)s has backing file '
                    '%(backing_file)s!') % {'path': active_file_path,
                                            'backing_file': info.backing_file}
            LOG.error(msg)
            raise exception.InvalidVolume(snap_error_msg)

        if info.file_format != 'raw':
            msg = _('Backup is only supported for raw-formatted '
                    'GlusterFS volumes.')
            raise exception.InvalidVolume(msg)

        return super(GlusterfsDriver, self).backup_volume(
            context, backup, backup_service)

    def _create_snapshot_online(self, snapshot, backing_filename,
                                new_snap_path):
        # Perform online snapshot via Nova
        context = snapshot['context']

        self._do_create_snapshot(snapshot,
                                 backing_filename,
                                 new_snap_path)

        connection_info = {
            'type': 'qcow2',
            'new_file': os.path.basename(new_snap_path),
            'snapshot_id': snapshot['id']
        }

        try:
            result = self._nova.create_volume_snapshot(
                context,
                snapshot['volume_id'],
                connection_info)
            LOG.debug('nova call result: %s' % result)
        except Exception as e:
            LOG.error(_('Call to Nova to create snapshot failed'))
            LOG.exception(e)
            raise e

        # Loop and wait for result
        # Nova will call Cinderclient to update the status in the database
        # An update of progress = '90%' means that Nova is done
        seconds_elapsed = 0
        increment = 1
        timeout = 600
        while True:
            s = db.snapshot_get(context, snapshot['id'])

            if s['status'] == 'creating':
                if s['progress'] == '90%':
                    # Nova tasks completed successfully
                    break

                time.sleep(increment)
                seconds_elapsed += increment
            elif s['status'] == 'error':

                msg = _('Nova returned "error" status '
                        'while creating snapshot.')
                raise exception.RemoteFSException(msg)

            LOG.debug('Status of snapshot %(id)s is now %(status)s' % {
                'id': snapshot['id'],
                'status': s['status']
            })

            if 10 < seconds_elapsed <= 20:
                increment = 2
            elif 20 < seconds_elapsed <= 60:
                increment = 5
            elif 60 < seconds_elapsed:
                increment = 10

            if seconds_elapsed > timeout:
                msg = _('Timed out while waiting for Nova update '
                        'for creation of snapshot %s.') % snapshot['id']
                raise exception.RemoteFSException(msg)