Implement snapshot tracking in HDS HNAS driver

HDS HNAS Manila driver is currently not tracking snapshot creation
status. This patch adds job status monitoring, thus snapshot
creation result will reflect the status reported by the job
submitted.

Partially implements bp hds-hnas

Change-Id: I20c692d19ea8afc6a14e9651c2af2c4de8cac553
This commit is contained in:
Tiago Pasqualini 2015-08-20 15:21:26 -03:00
parent 5fb872fed1
commit 9c2a81306e
3 changed files with 304 additions and 17 deletions

View File

@ -61,6 +61,10 @@ hds_hnas_opts = [
default=None,
help="The IP of the clusters admin node. Only set in HNAS "
"multinode clusters."),
cfg.IntOpt('hds_hnas_stalled_job_timeout',
default=30,
help="The time (in seconds) to wait for stalled HNAS jobs "
"before aborting."),
]
CONF = cfg.CONF
@ -95,6 +99,8 @@ class HDSHNASDriver(driver.ShareDriver):
cluster_admin_ip0 = self.configuration.safe_get(
'hds_hnas_cluster_admin_ip0')
self.private_storage = kwargs.get('private_storage')
job_timeout = self.configuration.safe_get(
'hds_hnas_stalled_job_timeout')
if hnas_evs_id is None:
msg = _("The config parameter hds_hnas_evs_id is not set.")
@ -122,7 +128,8 @@ class HDSHNASDriver(driver.ShareDriver):
self.hnas = ssh.HNASSSHBackend(hnas_ip, hnas_username, hnas_password,
ssh_private_key, cluster_admin_ip0,
hnas_evs_id, self.hnas_evs_ip, fs_name)
hnas_evs_id, self.hnas_evs_ip, fs_name,
job_timeout)
def allow_access(self, context, share, access, share_server=None):
"""Allow access to a share.

View File

@ -19,6 +19,8 @@ from oslo_utils import units
import paramiko
import six
import time
from manila import exception
from manila.i18n import _
from manila.i18n import _LE
@ -30,7 +32,7 @@ LOG = log.getLogger(__name__)
class HNASSSHBackend(object):
def __init__(self, hnas_ip, hnas_username, hnas_password, ssh_private_key,
cluster_admin_ip0, evs_id, evs_ip, fs_name):
cluster_admin_ip0, evs_id, evs_ip, fs_name, job_timeout):
self.ip = hnas_ip
self.port = 22
self.user = hnas_username
@ -41,6 +43,7 @@ class HNASSSHBackend(object):
self.fs_name = fs_name
self.evs_ip = evs_ip
self.sshpool = None
self.job_timeout = job_timeout
def get_stats(self):
"""Get the stats from file-system.
@ -253,16 +256,76 @@ class HNASSSHBackend(object):
:param share_id: ID of share for snapshot.
:param snapshot_id: ID of new snapshot.
"""
export = self._nfs_export_list(share_id)
saved_list = export[0].export_configuration
new_list = []
for access in saved_list:
new_list.append(access.replace('(rw)', '(ro)'))
self._update_access_rule(share_id, new_list)
src_path = '/shares/' + share_id
snap_path = '/snapshots/' + share_id + '/' + snapshot_id
try:
command = ['tree-clone-job-submit', '-e', '-f', self.fs_name,
src_path, snap_path]
output, err = self._execute(command)
if 'Request submitted successfully' in output:
LOG.debug("Request for creating snapshot submitted "
"successfully.")
job_submit = JobSubmit(output)
if job_submit.request_status == 'Request submitted successfully':
job_id = job_submit.job_id
job_status = None
progress = ''
job_rechecks = 0
starttime = time.time()
deadline = starttime + self.job_timeout
while not job_status or \
job_status.job_state != "Job was completed":
command = ['tree-clone-job-status', job_id]
output, err = self._execute(command)
job_status = JobStatus(output)
if job_status.job_state == 'Job failed':
break
old_progress = progress
progress = job_status.data_bytes_processed
if old_progress == progress:
job_rechecks += 1
now = time.time()
if now > deadline:
command = ['tree-clone-job-abort', job_id]
output, err = self._execute(command)
LOG.error(_LE("Timeout in snapshot %s creation.") %
snapshot_id)
msg = (_("Share snapshot %s was not created.")
% snapshot_id)
raise exception.HNASBackendException(msg=msg)
else:
time.sleep(job_rechecks ** 2)
else:
job_rechecks = 0
if (job_status.job_state, job_status.job_status,
job_status.directories_missing,
job_status.files_missing) == ("Job was completed",
"Success", '0', '0'):
LOG.debug("Snapshot %(snapshot_id)s from share "
"%(share_id)s created successfully.",
{'snapshot_id': snapshot_id,
'share_id': share_id})
else:
LOG.error(_LE('Error in snapshot %s creation.'),
snapshot_id)
msg = (_('Share snapshot %s was not created.') %
snapshot_id)
raise exception.HNASBackendException(msg=msg)
except processutils.ProcessExecutionError as e:
if ('Cannot find any clonable files in the source directory' in
e.stderr):
@ -274,6 +337,8 @@ class HNASSSHBackend(object):
msg = six.text_type(e)
LOG.exception(msg)
raise exception.HNASBackendException(msg=msg)
finally:
self._update_access_rule(share_id, saved_list)
def delete_snapshot(self, share_id, snapshot_id):
"""Deletes snapshot.
@ -705,4 +770,67 @@ class Export(object):
export_config = split_data[1].split('\n')
for i in range(0, len(export_config)):
if any(j.isdigit() or j.isalpha() for j in export_config[i]):
self.export_configuration.append(export_config[i])
self.export_configuration.append(export_config[i])
class JobStatus(object):
def __init__(self, data):
if data:
split_data = data.replace(",", "").split()
self.job_id = split_data[4]
self.pysical_node = split_data[14]
self.evs = split_data[17]
self.volume_number = split_data[21]
self.fs_id = split_data[26]
self.fs_name = split_data[31]
self.source_path = split_data[35]
self.creation_time = split_data[39] + " " + split_data[40]
self.destination_path = split_data[44]
self.ensure_destination_path_exists = split_data[50]
if split_data[55] == 'failed':
self.job_state = " ".join(split_data[54:56])
else:
self.job_state = " ".join(split_data[54:57])
for i in range(55, len(split_data)):
if split_data[i] == "Started":
self.job_started = " ".join(split_data[i + 2:i + 4])
elif split_data[i] == "Ended":
self.job_ended = " ".join(split_data[i + 2:i + 4])
elif split_data[i] == "Status":
self.job_status = split_data[i + 2]
elif " ".join(split_data[i:i + 2]) == "Error details":
self.error_details = \
split_data[i + 2] if split_data[i + 2] != ":" else ""
elif " ".join(split_data[i:i + 2]) == "Directories processed":
self.directories_processed = split_data[i + 3]
elif " ".join(split_data[i:i + 2]) == "Files processed":
self.files_processed = split_data[i + 3]
elif " ".join(split_data[i:i + 3]) == "Data bytes processed":
self.data_bytes_processed = split_data[i + 4]
elif " ".join(split_data[i:i + 3]) == "Source directories " \
"missing":
self.directories_missing = split_data[i + 4]
elif " ".join(split_data[i:i + 3]) == "Source files missing":
self.files_missing = split_data[i + 4]
elif " ".join(split_data[i:i + 3]) == "Source files skipped":
self.files_skipped = split_data[i + 4]
elif split_data[i] == "symlinks":
self.symlinks_skipped = split_data[i - 1]
elif " ".join(split_data[i:i + 2]) == "hard links":
self.hard_links_skipped = split_data[i - 1]
elif " ".join(split_data[i:i + 3]) == "block special devices":
self.block_special_devices_skipped = split_data[i - 1]
elif " ".join(split_data[i:i + 2]) == "character devices":
self.character_devices_skipped = split_data[i - 1]
class JobSubmit(object):
def __init__(self, data):
if data:
split_data = data.replace(".", "").split()
self.request_status = " ".join(split_data[1:4])
self.job_id = split_data[8]

View File

@ -13,6 +13,8 @@
# License for the specific language governing permissions and limitations
# under the License.
import time
import mock
from oslo_concurrency import processutils as putils
from oslo_config import cfg
@ -293,6 +295,97 @@ vol3
usage bytes : 5 GB (5368709120 B) files: 2
last modified: 2015-07-28 20:23:05.672404600+00:00"""
HNAS_RESULT_tree_job_status_fail = """
tree-clone-job-status: Job id = d933100a-b5f6-11d0-91d9-836896aada5d
JOB ID : d933100a-b5f6-11d0-91d9-836896aada5d
Job request
Physical node : 1
EVS : 1
Volume number : 1
File system id : 2ea361c20ed0f80d0000000000000000
File system name : fs1
Source path : "/foo"
Creation time : 2013-09-05 23:16:48-07:00
Destination path : "/clone/bar"
Ensure destination path exists : true
Job state : Job failed
Job info
Started : 2013-09-05 23:16:48-07:00
Ended : 2013-09-05 23:17:02-07:00
Status : Success
Error details :
Directories processed : 220
Files processed : 910
Data bytes processed : 34.5 MB (36174754 B)
Source directories missing : 0
Source files missing : 0
Source files skipped : 801
Skipping details : 104 symlinks, 452 hard links,
47 block special devices, 25 character devices"""
HNAS_RESULT_job = """tree-operation-job-submit: Request submitted successfully.
tree-operation-job-submit: Job id = d933100a-b5f6-11d0-91d9-836896aada5d """
HNAS_RESULT_job_completed = """
tree-clone-job-status: Job id = ab4211b8-aac8-11ce-91af-39e0822ea368
JOB ID : ab4211b8-aac8-11ce-91af-39e0822ea368
Job request
Physical node : 1
EVS : 1
Volume number : 1
File system id : 2ea361c20ed0f80d0000000000000000
File system name : fs1
Source path : "/foo"
Creation time : 2013-09-05 23:16:48-07:00
Destination path : "/clone/bar"
Ensure destination path exists : true
Job state : Job was completed
Job info
Started : 2013-09-05 23:16:48-07:00
Ended : 2013-09-05 23:17:02-07:00
Status : Success
Error details :
Directories processed : 220
Files processed : 910
Data bytes processed : 34.5 MB (36174754 B)
Source directories missing : 0
Source files missing : 0
Source files skipped : 801
Skipping details : 104 symlinks, 452 hard links, 47 \
block special devices, 25 character devices
"""
HNAS_RESULT_job_running = """
tree-clone-job-status: Job id = ab4211b8-aac8-11ce-91af-39e0822ea368
JOB ID : ab4211b8-aac8-11ce-91af-39e0822ea368
Job request
Physical node : 1
EVS : 1
Volume number : 1
File system id : 2ea361c20ed0f80d0000000000000000
File system name : fs1
Source path : "/foo"
Creation time : 2013-09-05 23:16:48-07:00
Destination path : "/clone/bar"
Ensure destination path exists : true
Job state : Job is running
Job info
Started : 2013-09-05 23:16:48-07:00
Ended : 2013-09-05 23:17:02-07:00
Status : Success
Error details :
Directories processed : 220
Files processed : 910
Data bytes processed : 34.5 MB (36174754 B)
Source directories missing : 0
Source files missing : 0
Source files skipped : 801
Skipping details : 104 symlinks, 452 hard links, 47 \
block special devices, 25 character devices
"""
class HNASSSHTestCase(test.TestCase):
def setUp(self):
@ -308,13 +401,15 @@ class HNASSSHTestCase(test.TestCase):
self.evs_id = 2
self.ssh_private_key = 'private_key'
self.cluster_admin_ip0 = 'fake'
self.job_timeout = 30
self.mock_log = self.mock_object(ssh, 'LOG')
self._driver = ssh.HNASSSHBackend(self.ip, self.user, self.password,
self.ssh_private_key,
self.cluster_admin_ip0, self.evs_id,
self.evs_ip, self.fs_name)
self.evs_ip, self.fs_name,
self.job_timeout)
self.vvol = {
'id': 'vvol_test',
@ -590,7 +685,7 @@ class HNASSSHTestCase(test.TestCase):
def test_ensure_share_umounted_fs(self):
fake_list_command = ['nfs-export', 'list ', '/shares/vvol_test']
# tests when filesystem is umounted
# Tests when filesystem is unmounted
self.mock_object(ssh.HNASSSHBackend, '_execute',
mock.Mock(side_effect=[(HNAS_RESULT_fs, ""),
(HNAS_RESULT_u_fs, ""),
@ -817,17 +912,67 @@ class HNASSSHTestCase(test.TestCase):
fake_create_command = ['tree-clone-job-submit', '-e',
'-f', 'file_system', '/shares/vvol_test',
'/snapshots/vvol_test/snapshot_test']
fake_progress_command = ['tree-clone-job-status',
'd933100a-b5f6-11d0-91d9-836896aada5d']
# Tests when a tree job is successfully submitted
self.mock_object(ssh.HNASSSHBackend, '_execute',
mock.Mock(return_value=(HNAS_RESULT_job, "")))
mock.Mock(side_effect=[
(HNAS_RESULT_export, ""),
(HNAS_RESULT_empty, ""),
(HNAS_RESULT_job, ""),
(HNAS_RESULT_job_completed, ""),
(HNAS_RESULT_empty, "")]))
self._driver.create_snapshot(self.vvol['id'],
self.snapshot['id'])
self.assertTrue(self.mock_log.debug.called)
# Assert that _execute sent the right tree-clone command
ssh.HNASSSHBackend._execute.assert_called_with(fake_create_command)
ssh.HNASSSHBackend._execute.assert_any_call(fake_create_command)
ssh.HNASSSHBackend._execute.assert_any_call(fake_progress_command)
def test_create_snapshot_hnas_timeout(self):
self.mock_object(time, 'time', mock.Mock(side_effect=[1, 1, 200, 300]))
self.mock_object(time, 'sleep')
# Tests when a running tree job stalls at HNAS
self.mock_object(ssh.HNASSSHBackend, '_execute',
mock.Mock(side_effect=[
(HNAS_RESULT_export, ""),
(HNAS_RESULT_empty, ""),
(HNAS_RESULT_job, ""),
(HNAS_RESULT_job_running, ""),
(HNAS_RESULT_job_running, ""),
(HNAS_RESULT_job_running, ""),
(HNAS_RESULT_empty, ""),
(HNAS_RESULT_empty, "")]))
self.assertRaises(exception.HNASBackendException,
self._driver.create_snapshot,
self.vvol['id'], self.snapshot['id'])
def test_create_snapshot_job_fails(self):
# Tests when running a tree job fails
fake_create_command = ['tree-clone-job-status',
'd933100a-b5f6-11d0-91d9-836896aada5d']
self.mock_object(ssh.HNASSSHBackend, '_execute',
mock.Mock(side_effect=[
(HNAS_RESULT_export, ""),
(HNAS_RESULT_empty, ""),
(HNAS_RESULT_job, ""),
(HNAS_RESULT_tree_job_status_fail, ""),
(HNAS_RESULT_empty, "")]))
mock_log = self.mock_object(ssh, 'LOG')
self.assertRaises(exception.HNASBackendException,
self._driver.create_snapshot, self.vvol['id'],
self.snapshot['id'])
self.assertTrue(mock_log.error.called)
ssh.HNASSSHBackend._execute.assert_any_call(fake_create_command)
def test_create_empty_snapshot(self):
fake_create_command = ['selectfs', 'file_system', '\n', 'ssc',
@ -838,16 +983,20 @@ class HNASSSHTestCase(test.TestCase):
msg = 'Cannot find any clonable files in the source directory'
self.mock_object(ssh.HNASSSHBackend, '_execute',
mock.Mock(side_effect=[(putils.ProcessExecutionError
(stderr=msg)),
(HNAS_RESULT_empty, "")]))
mock.Mock(side_effect=[
(HNAS_RESULT_export, ""),
(HNAS_RESULT_empty, ""),
(putils.ProcessExecutionError(stderr=msg)),
(HNAS_RESULT_empty, ""),
(HNAS_RESULT_empty, "")]))
self._driver.create_snapshot(self.vvol['id'], self.snapshot['id'])
self.assertTrue(self.mock_log.warning.called)
# Assert that _execute sent the right command to select fs and create
# a directory.
ssh.HNASSSHBackend._execute.assert_called_with(fake_create_command)
ssh.HNASSSHBackend._execute.assert_any_call(fake_create_command)
def test_create_snapshot_submit_fails(self):
fake_create_command = ['tree-clone-job-submit', '-e', '-f',
@ -857,15 +1006,18 @@ class HNASSSHTestCase(test.TestCase):
msg = 'Cannot create copy from this directory'
self.mock_object(ssh.HNASSSHBackend, '_execute',
mock.Mock(side_effect=putils.ProcessExecutionError
(stderr=msg)))
mock.Mock(side_effect=[
(HNAS_RESULT_export, ""),
(HNAS_RESULT_empty, ""),
putils.ProcessExecutionError(stderr=msg),
(HNAS_RESULT_empty, "")]))
self.assertRaises(exception.HNASBackendException,
self._driver.create_snapshot, self.vvol['id'],
self.snapshot['id'])
self.assertTrue(self.mock_log.exception.called)
ssh.HNASSSHBackend._execute.assert_called_with(fake_create_command)
ssh.HNASSSHBackend._execute.assert_any_call(fake_create_command)
def test_delete_snapshot(self):
fake_delete_command = ['selectfs', 'file_system', '\n', 'ssc',