cinder/cinder/volume/drivers/glusterfs.py

647 lines
24 KiB
Python

# Copyright (c) 2013 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import errno
import os
import stat
import time
from oslo.config import cfg
from cinder.brick.remotefs import remotefs as remotefs_brick
from cinder import compute
from cinder import db
from cinder import exception
from cinder.i18n import _
from cinder.image import image_utils
from cinder.openstack.common import fileutils
from cinder.openstack.common import log as logging
from cinder.openstack.common import processutils
from cinder.openstack.common import units
from cinder import utils
from cinder.volume.drivers import remotefs as remotefs_drv
LOG = logging.getLogger(__name__)
volume_opts = [
cfg.StrOpt('glusterfs_shares_config',
default='/etc/cinder/glusterfs_shares',
help='File with the list of available gluster shares'),
cfg.BoolOpt('glusterfs_sparsed_volumes',
default=True,
help=('Create volumes as sparsed files which take no space.'
'If set to False volume is created as regular file.'
'In such case volume creation takes a lot of time.')),
cfg.BoolOpt('glusterfs_qcow2_volumes',
default=False,
help=('Create volumes as QCOW2 files rather than raw files.')),
cfg.StrOpt('glusterfs_mount_point_base',
default='$state_path/mnt',
help='Base dir containing mount points for gluster shares.'),
]
CONF = cfg.CONF
CONF.register_opts(volume_opts)
CONF.import_opt('volume_name_template', 'cinder.db')
class GlusterfsDriver(remotefs_drv.RemoteFSSnapDriver):
"""Gluster based cinder driver. Creates file on Gluster share for using it
as block device on hypervisor.
Operations such as create/delete/extend volume/snapshot use locking on a
per-process basis to prevent multiple threads from modifying qcow2 chains
or the snapshot .info file simultaneously.
"""
driver_volume_type = 'glusterfs'
driver_prefix = 'glusterfs'
volume_backend_name = 'GlusterFS'
VERSION = '1.2.0'
def __init__(self, execute=processutils.execute, *args, **kwargs):
self._remotefsclient = None
super(GlusterfsDriver, self).__init__(*args, **kwargs)
self.configuration.append_config_values(volume_opts)
self._nova = None
self.base = getattr(self.configuration,
'glusterfs_mount_point_base',
CONF.glusterfs_mount_point_base)
self._remotefsclient = remotefs_brick.RemoteFsClient(
'glusterfs',
execute,
glusterfs_mount_point_base=self.base)
def set_execute(self, execute):
super(GlusterfsDriver, self).set_execute(execute)
if self._remotefsclient:
self._remotefsclient.set_execute(execute)
def do_setup(self, context):
"""Any initialization the volume driver does while starting."""
super(GlusterfsDriver, self).do_setup(context)
self._nova = compute.API()
config = self.configuration.glusterfs_shares_config
if not config:
msg = (_("There's no Gluster config file configured (%s)") %
'glusterfs_shares_config')
LOG.warn(msg)
raise exception.GlusterfsException(msg)
if not os.path.exists(config):
msg = (_("Gluster config file at %(config)s doesn't exist") %
{'config': config})
LOG.warn(msg)
raise exception.GlusterfsException(msg)
self.shares = {}
try:
self._execute('mount.glusterfs', check_exit_code=False)
except OSError as exc:
if exc.errno == errno.ENOENT:
raise exception.GlusterfsException(
_('mount.glusterfs is not installed'))
else:
raise
self._refresh_mounts()
def _unmount_shares(self):
self._load_shares_config(self.configuration.glusterfs_shares_config)
for share in self.shares.keys():
try:
self._do_umount(True, share)
except Exception as exc:
LOG.warning(_('Exception during unmounting %s') % (exc))
def _do_umount(self, ignore_not_mounted, share):
mount_path = self._get_mount_point_for_share(share)
command = ['umount', mount_path]
try:
self._execute(*command, run_as_root=True)
except processutils.ProcessExecutionError as exc:
if ignore_not_mounted and 'not mounted' in exc.stderr:
LOG.info(_("%s is already umounted"), share)
else:
LOG.error(_("Failed to umount %(share)s, reason=%(stderr)s"),
{'share': share, 'stderr': exc.stderr})
raise
def _refresh_mounts(self):
try:
self._unmount_shares()
except processutils.ProcessExecutionError as exc:
if 'target is busy' in exc.stderr:
LOG.warn(_("Failed to refresh mounts, reason=%s") %
exc.stderr)
else:
raise
self._ensure_shares_mounted()
def _qemu_img_info(self, path, volume_name):
return super(GlusterfsDriver, self)._qemu_img_info_base(
path, volume_name, self.configuration.glusterfs_mount_point_base)
def check_for_setup_error(self):
"""Just to override parent behavior."""
pass
def _local_volume_dir(self, volume):
hashed = self._get_hash_str(volume['provider_location'])
path = '%s/%s' % (self.configuration.glusterfs_mount_point_base,
hashed)
return path
@utils.synchronized('glusterfs', external=False)
def create_cloned_volume(self, volume, src_vref):
"""Creates a clone of the specified volume."""
self._create_cloned_volume(volume, src_vref)
@utils.synchronized('glusterfs', external=False)
def create_volume(self, volume):
"""Creates a volume."""
self._ensure_shares_mounted()
volume['provider_location'] = self._find_share(volume['size'])
LOG.info(_('casted to %s') % volume['provider_location'])
self._do_create_volume(volume)
return {'provider_location': volume['provider_location']}
@utils.synchronized('glusterfs', external=False)
def create_volume_from_snapshot(self, volume, snapshot):
self._create_volume_from_snapshot(volume, snapshot)
def _copy_volume_from_snapshot(self, snapshot, volume, volume_size):
"""Copy data from snapshot to destination volume.
This is done with a qemu-img convert to raw/qcow2 from the snapshot
qcow2.
"""
LOG.debug("snapshot: %(snap)s, volume: %(vol)s, "
"volume_size: %(size)s"
% {'snap': snapshot['id'],
'vol': volume['id'],
'size': volume_size})
info_path = self._local_path_volume_info(snapshot['volume'])
snap_info = self._read_info_file(info_path)
vol_path = self._local_volume_dir(snapshot['volume'])
forward_file = snap_info[snapshot['id']]
forward_path = os.path.join(vol_path, forward_file)
# Find the file which backs this file, which represents the point
# when this snapshot was created.
img_info = self._qemu_img_info(forward_path,
snapshot['volume']['name'])
path_to_snap_img = os.path.join(vol_path, img_info.backing_file)
path_to_new_vol = self._local_path_volume(volume)
LOG.debug("will copy from snapshot at %s" % path_to_snap_img)
if self.configuration.glusterfs_qcow2_volumes:
out_format = 'qcow2'
else:
out_format = 'raw'
image_utils.convert_image(path_to_snap_img,
path_to_new_vol,
out_format)
self._set_rw_permissions_for_all(path_to_new_vol)
@utils.synchronized('glusterfs', external=False)
def delete_volume(self, volume):
"""Deletes a logical volume."""
if not volume['provider_location']:
LOG.warn(_('Volume %s does not have provider_location specified, '
'skipping'), volume['name'])
return
self._ensure_share_mounted(volume['provider_location'])
volume_dir = self._local_volume_dir(volume)
mounted_path = os.path.join(volume_dir,
self.get_active_image_from_info(volume))
self._execute('rm', '-f', mounted_path, run_as_root=True)
# If an exception (e.g. timeout) occurred during delete_snapshot, the
# base volume may linger around, so just delete it if it exists
base_volume_path = self._local_path_volume(volume)
fileutils.delete_if_exists(base_volume_path)
info_path = self._local_path_volume_info(volume)
fileutils.delete_if_exists(info_path)
@utils.synchronized('glusterfs', external=False)
def create_snapshot(self, snapshot):
"""Apply locking to the create snapshot operation."""
return self._create_snapshot(snapshot)
def _get_matching_backing_file(self, backing_chain, snapshot_file):
return next(f for f in backing_chain
if f.get('backing-filename', '') == snapshot_file)
@utils.synchronized('glusterfs', external=False)
def delete_snapshot(self, snapshot):
"""Apply locking to the delete snapshot operation."""
self._delete_snapshot(snapshot)
def _delete_snapshot_online(self, context, snapshot, info):
# Update info over the course of this method
# active file never changes
info_path = self._local_path_volume(snapshot['volume']) + '.info'
snap_info = self._read_info_file(info_path)
if info['active_file'] == info['snapshot_file']:
# blockRebase/Pull base into active
# info['base'] => snapshot_file
file_to_delete = info['base_file']
if info['base_id'] is None:
# Passing base=none to blockRebase ensures that
# libvirt blanks out the qcow2 backing file pointer
new_base = None
else:
new_base = info['new_base_file']
snap_info[info['base_id']] = info['snapshot_file']
delete_info = {'file_to_merge': new_base,
'merge_target_file': None, # current
'type': 'qcow2',
'volume_id': snapshot['volume']['id']}
del(snap_info[snapshot['id']])
else:
# blockCommit snapshot into base
# info['base'] <= snapshot_file
# delete record of snapshot
file_to_delete = info['snapshot_file']
delete_info = {'file_to_merge': info['snapshot_file'],
'merge_target_file': info['base_file'],
'type': 'qcow2',
'volume_id': snapshot['volume']['id']}
del(snap_info[snapshot['id']])
try:
self._nova.delete_volume_snapshot(
context,
snapshot['id'],
delete_info)
except Exception as e:
LOG.error(_('Call to Nova delete snapshot failed'))
LOG.exception(e)
raise e
# Loop and wait for result
# Nova will call Cinderclient to update the status in the database
# An update of progress = '90%' means that Nova is done
seconds_elapsed = 0
increment = 1
timeout = 7200
while True:
s = db.snapshot_get(context, snapshot['id'])
if s['status'] == 'deleting':
if s['progress'] == '90%':
# Nova tasks completed successfully
break
else:
msg = ('status of snapshot %s is '
'still "deleting"... waiting') % snapshot['id']
LOG.debug(msg)
time.sleep(increment)
seconds_elapsed += increment
else:
msg = _('Unable to delete snapshot %(id)s, '
'status: %(status)s.') % {'id': snapshot['id'],
'status': s['status']}
raise exception.GlusterfsException(msg)
if 10 < seconds_elapsed <= 20:
increment = 2
elif 20 < seconds_elapsed <= 60:
increment = 5
elif 60 < seconds_elapsed:
increment = 10
if seconds_elapsed > timeout:
msg = _('Timed out while waiting for Nova update '
'for deletion of snapshot %(id)s.') %\
{'id': snapshot['id']}
raise exception.GlusterfsException(msg)
# Write info file updated above
self._write_info_file(info_path, snap_info)
# Delete stale file
path_to_delete = os.path.join(
self._local_volume_dir(snapshot['volume']), file_to_delete)
self._execute('rm', '-f', path_to_delete, run_as_root=True)
def ensure_export(self, ctx, volume):
"""Synchronously recreates an export for a logical volume."""
self._ensure_share_mounted(volume['provider_location'])
def create_export(self, ctx, volume):
"""Exports the volume."""
pass
def remove_export(self, ctx, volume):
"""Removes an export for a logical volume."""
pass
def validate_connector(self, connector):
pass
@utils.synchronized('glusterfs', external=False)
def initialize_connection(self, volume, connector):
"""Allow connection to connector and return connection info."""
# Find active qcow2 file
active_file = self.get_active_image_from_info(volume)
path = '%s/%s/%s' % (self.configuration.glusterfs_mount_point_base,
self._get_hash_str(volume['provider_location']),
active_file)
data = {'export': volume['provider_location'],
'name': active_file}
if volume['provider_location'] in self.shares:
data['options'] = self.shares[volume['provider_location']]
# Test file for raw vs. qcow2 format
info = self._qemu_img_info(path, volume['name'])
data['format'] = info.file_format
if data['format'] not in ['raw', 'qcow2']:
msg = _('%s must be a valid raw or qcow2 image.') % path
raise exception.InvalidVolume(msg)
return {
'driver_volume_type': 'glusterfs',
'data': data,
'mount_point_base': self._get_mount_point_base()
}
def terminate_connection(self, volume, connector, **kwargs):
"""Disallow connection from connector."""
pass
@utils.synchronized('glusterfs', external=False)
def copy_volume_to_image(self, context, volume, image_service, image_meta):
self._copy_volume_to_image(context, volume, image_service,
image_meta)
@utils.synchronized('glusterfs', external=False)
def extend_volume(self, volume, size_gb):
volume_path = self.local_path(volume)
volume_filename = os.path.basename(volume_path)
# Ensure no snapshots exist for the volume
active_image = self.get_active_image_from_info(volume)
if volume_filename != active_image:
msg = _('Extend volume is only supported for this'
' driver when no snapshots exist.')
raise exception.InvalidVolume(msg)
info = self._qemu_img_info(volume_path, volume['name'])
backing_fmt = info.file_format
if backing_fmt not in ['raw', 'qcow2']:
msg = _('Unrecognized backing format: %s')
raise exception.InvalidVolume(msg % backing_fmt)
# qemu-img can resize both raw and qcow2 files
image_utils.resize_image(volume_path, size_gb)
def _do_create_volume(self, volume):
"""Create a volume on given glusterfs_share.
:param volume: volume reference
"""
volume_path = self.local_path(volume)
volume_size = volume['size']
LOG.debug("creating new volume at %s" % volume_path)
if os.path.exists(volume_path):
msg = _('file already exists at %s') % volume_path
LOG.error(msg)
raise exception.InvalidVolume(reason=msg)
if self.configuration.glusterfs_qcow2_volumes:
self._create_qcow2_file(volume_path, volume_size)
else:
if self.configuration.glusterfs_sparsed_volumes:
self._create_sparsed_file(volume_path, volume_size)
else:
self._create_regular_file(volume_path, volume_size)
self._set_rw_permissions_for_all(volume_path)
def _ensure_shares_mounted(self):
"""Mount all configured GlusterFS shares."""
self._mounted_shares = []
self._load_shares_config(self.configuration.glusterfs_shares_config)
for share in self.shares.keys():
try:
self._ensure_share_mounted(share)
self._mounted_shares.append(share)
except Exception as exc:
LOG.error(_('Exception during mounting %s') % (exc,))
LOG.debug('Available shares: %s' % self._mounted_shares)
def _ensure_share_mounted(self, glusterfs_share):
"""Mount GlusterFS share.
:param glusterfs_share: string
"""
mount_path = self._get_mount_point_for_share(glusterfs_share)
self._mount_glusterfs(glusterfs_share, mount_path, ensure=True)
# Ensure we can write to this share
group_id = os.getegid()
current_group_id = utils.get_file_gid(mount_path)
current_mode = utils.get_file_mode(mount_path)
if group_id != current_group_id:
cmd = ['chgrp', group_id, mount_path]
self._execute(*cmd, run_as_root=True)
if not (current_mode & stat.S_IWGRP):
cmd = ['chmod', 'g+w', mount_path]
self._execute(*cmd, run_as_root=True)
self._ensure_share_writable(mount_path)
def _find_share(self, volume_size_for):
"""Choose GlusterFS share among available ones for given volume size.
Current implementation looks for greatest capacity.
:param volume_size_for: int size in GB
"""
if not self._mounted_shares:
raise exception.GlusterfsNoSharesMounted()
greatest_size = 0
greatest_share = None
for glusterfs_share in self._mounted_shares:
capacity = self._get_available_capacity(glusterfs_share)[0]
if capacity > greatest_size:
greatest_share = glusterfs_share
greatest_size = capacity
if volume_size_for * units.Gi > greatest_size:
raise exception.GlusterfsNoSuitableShareFound(
volume_size=volume_size_for)
return greatest_share
def _mount_glusterfs(self, glusterfs_share, mount_path, ensure=False):
"""Mount GlusterFS share to mount path."""
# TODO(eharney): make this fs-agnostic and factor into remotefs
self._execute('mkdir', '-p', mount_path)
command = ['mount', '-t', 'glusterfs', glusterfs_share,
mount_path]
if self.shares.get(glusterfs_share) is not None:
command.extend(self.shares[glusterfs_share].split())
self._do_mount(command, ensure, glusterfs_share)
def backup_volume(self, context, backup, backup_service):
"""Create a new backup from an existing volume.
Allow a backup to occur only if no snapshots exist.
Check both Cinder and the file on-disk. The latter is only
a safety mechanism to prevent further damage if the snapshot
information is already inconsistent.
"""
snapshots = self.db.snapshot_get_all_for_volume(context,
backup['volume_id'])
snap_error_msg = _('Backup is not supported for GlusterFS '
'volumes with snapshots.')
if len(snapshots) > 0:
raise exception.InvalidVolume(snap_error_msg)
volume = self.db.volume_get(context, backup['volume_id'])
volume_dir = self._local_volume_dir(volume)
active_file_path = os.path.join(
volume_dir,
self.get_active_image_from_info(volume))
info = self._qemu_img_info(active_file_path, volume['name'])
if info.backing_file is not None:
msg = _('No snapshots found in database, but '
'%(path)s has backing file '
'%(backing_file)s!') % {'path': active_file_path,
'backing_file': info.backing_file}
LOG.error(msg)
raise exception.InvalidVolume(snap_error_msg)
if info.file_format != 'raw':
msg = _('Backup is only supported for raw-formatted '
'GlusterFS volumes.')
raise exception.InvalidVolume(msg)
return super(GlusterfsDriver, self).backup_volume(
context, backup, backup_service)
def _create_snapshot_online(self, snapshot, backing_filename,
new_snap_path):
# Perform online snapshot via Nova
context = snapshot['context']
self._do_create_snapshot(snapshot,
backing_filename,
new_snap_path)
connection_info = {
'type': 'qcow2',
'new_file': os.path.basename(new_snap_path),
'snapshot_id': snapshot['id']
}
try:
result = self._nova.create_volume_snapshot(
context,
snapshot['volume_id'],
connection_info)
LOG.debug('nova call result: %s' % result)
except Exception as e:
LOG.error(_('Call to Nova to create snapshot failed'))
LOG.exception(e)
raise e
# Loop and wait for result
# Nova will call Cinderclient to update the status in the database
# An update of progress = '90%' means that Nova is done
seconds_elapsed = 0
increment = 1
timeout = 600
while True:
s = db.snapshot_get(context, snapshot['id'])
if s['status'] == 'creating':
if s['progress'] == '90%':
# Nova tasks completed successfully
break
time.sleep(increment)
seconds_elapsed += increment
elif s['status'] == 'error':
msg = _('Nova returned "error" status '
'while creating snapshot.')
raise exception.RemoteFSException(msg)
LOG.debug('Status of snapshot %(id)s is now %(status)s' % {
'id': snapshot['id'],
'status': s['status']
})
if 10 < seconds_elapsed <= 20:
increment = 2
elif 20 < seconds_elapsed <= 60:
increment = 5
elif 60 < seconds_elapsed:
increment = 10
if seconds_elapsed > timeout:
msg = _('Timed out while waiting for Nova update '
'for creation of snapshot %s.') % snapshot['id']
raise exception.RemoteFSException(msg)