Added changes to make errors and recovery for volumes more graceful:

* If an error occurs while creating our exporting a volume, volume['status'] is set to 'error,' rather than leaving it in 'creating.'  Similarly, if an error occurs while deleting, volume['status'] is set to 'error_deleting'
* Gracefully handle condition where iscsi export has already been removed, or has never successfully been created, in ISCSIDriver.remove_export
* In ISCSIDriver.ensure_export, gracefully handle situation where an iscsi_target is nil.  This occurs when there is an error creating a volume that prevents the target from being assigned.
* Add nova-manage volume commands: delete and reattach
** delete can be used to delete volumes that are in error state, since users can only delete volumes flagged as 'available'
** reattach can be used after host reboot to reattach volumes 

Note that I didn't implement rollback as the original bug suggested, but this code accomplishes the same objectives IMO by making it more clear when errors occur and providing tools for dealing with certain common error conditions.
This commit is contained in:
Anthony Young
2011-01-20 01:33:58 +00:00
committed by Tarmac

View File

@@ -79,7 +79,9 @@ from nova import exception
from nova import flags
from nova import log as logging
from nova import quota
from nova import rpc
from nova import utils
from nova.api.ec2.cloud import ec2_id_to_id
from nova.auth import manager
from nova.cloudpipe import pipelib
from nova.db import migration
@@ -95,6 +97,16 @@ flags.DECLARE('vpn_start', 'nova.network.manager')
flags.DECLARE('fixed_range_v6', 'nova.network.manager')
def param2id(object_id):
"""Helper function to convert various id types to internal id.
args: [object_id], e.g. 'vol-0000000a' or 'volume-0000000a' or '10'
"""
if '-' in object_id:
return ec2_id_to_id(object_id)
else:
return int(object_id)
class VpnCommands(object):
"""Class for managing VPNs."""
@@ -535,6 +547,46 @@ class DbCommands(object):
print migration.db_version()
class VolumeCommands(object):
"""Methods for dealing with a cloud in an odd state"""
def delete(self, volume_id):
"""Delete a volume, bypassing the check that it
must be available.
args: volume_id_id"""
ctxt = context.get_admin_context()
volume = db.volume_get(ctxt, param2id(volume_id))
host = volume['host']
if volume['status'] == 'in-use':
print "Volume is in-use."
print "Detach volume from instance and then try again."
return
rpc.cast(ctxt,
db.queue_get_for(ctxt, FLAGS.volume_topic, host),
{"method": "delete_volume",
"args": {"volume_id": volume['id']}})
def reattach(self, volume_id):
"""Re-attach a volume that has previously been attached
to an instance. Typically called after a compute host
has been rebooted.
args: volume_id_id"""
ctxt = context.get_admin_context()
volume = db.volume_get(ctxt, param2id(volume_id))
if not volume['instance_id']:
print "volume is not attached to an instance"
return
instance = db.instance_get(ctxt, volume['instance_id'])
host = instance['host']
rpc.cast(ctxt,
db.queue_get_for(ctxt, FLAGS.compute_topic, host),
{"method": "attach_volume",
"args": {"instance_id": instance['id'],
"volume_id": volume['id'],
"mountpoint": volume['mountpoint']}})
CATEGORIES = [
('user', UserCommands),
('project', ProjectCommands),
@@ -545,7 +597,8 @@ CATEGORIES = [
('network', NetworkCommands),
('service', ServiceCommands),
('log', LogCommands),
('db', DbCommands)]
('db', DbCommands),
('volume', VolumeCommands)]
def lazy_match(name, key_value_tuples):