Actively check iSCSI connection after login

This replaces the sleep command at the end of the login_iscsi fuction
with an active check and a forced lun update. These two things help ensure
the connection sees all luns on the iSCSI device before attempting to write
to it. It also adds a configurable option to allow users to be able to
customize the number of attempts for their environment.

Also add some very basic logging to aid in deploy debuging

Change-Id: I68c9f9f86f5f113bb111c0f4fd83216ae0659d36
Closes-bug: #1415117
Partial-Bug: #1286374
This commit is contained in:
Chris Krelle 2015-01-26 15:58:05 -08:00
parent cee9594382
commit 46067160e6
6 changed files with 119 additions and 3 deletions

View File

@ -734,6 +734,10 @@
# value) # value)
#dd_block_size=1M #dd_block_size=1M
# Max attempts to verify a iSCSI connection is active.
# (integer value)
#iscsi_verify_attempts=3
[dhcp] [dhcp]

View File

@ -135,6 +135,7 @@ class DiskPartitioner(object):
def commit(self): def commit(self):
"""Write to the disk.""" """Write to the disk."""
LOG.debug("Commiting partitions to disk.")
cmd_args = ['mklabel', self._disk_label] cmd_args = ['mklabel', self._disk_label]
# NOTE(lucasagomes): Lead in with 1MiB to allow room for the # NOTE(lucasagomes): Lead in with 1MiB to allow room for the
# partition table itself. # partition table itself.

View File

@ -526,6 +526,7 @@ def dd(src, dst, *args):
:raises: processutils.ProcessExecutionError if it failed :raises: processutils.ProcessExecutionError if it failed
to run the process. to run the process.
""" """
LOG.debug("Starting dd process.")
execute('dd', 'if=%s' % src, 'of=%s' % dst, *args, execute('dd', 'if=%s' % src, 'of=%s' % dst, *args,
run_as_root=True, check_exit_code=[0]) run_as_root=True, check_exit_code=[0])

View File

@ -645,6 +645,8 @@ class ConductorManager(periodic_task.PeriodicTasks):
else: else:
event = 'deploy' event = 'deploy'
LOG.debug("do_node_deploy Calling event: %(event)s for node: "
"%(node)s", {'event': event, 'node': node.uuid})
try: try:
task.process_event(event, task.process_event(event,
callback=self._spawn_worker, callback=self._spawn_worker,

View File

@ -48,6 +48,9 @@ deploy_opts = [
cfg.StrOpt('dd_block_size', cfg.StrOpt('dd_block_size',
default='1M', default='1M',
help='Block size to use when writing to the nodes disk.'), help='Block size to use when writing to the nodes disk.'),
cfg.IntOpt('iscsi_verify_attempts',
default=3,
help='Max attempts to verify a iSCSI connection is active.'),
] ]
CONF = cfg.CONF CONF = cfg.CONF
@ -83,7 +86,44 @@ def login_iscsi(portal_address, portal_port, target_iqn):
attempts=5, attempts=5,
delay_on_retry=True) delay_on_retry=True)
# Ensure the login complete # Ensure the login complete
time.sleep(3) verify_iscsi_connection(target_iqn)
# force iSCSI initiator to re-read luns
force_iscsi_lun_update(target_iqn)
def verify_iscsi_connection(target_iqn):
"""Verify iscsi connection."""
LOG.debug("Checking for iSCSI target to become active.")
for attempt in range(CONF.deploy.iscsi_verify_attempts):
out, _err = utils.execute('iscsiadm',
'-m', 'node',
'-S',
run_as_root=True,
check_exit_code=[0])
if target_iqn in out:
break
time.sleep(1)
LOG.debug("iSCSI connection not active. Rechecking. Attempt "
"%(attempt)d out of %(total)d", {"attempt": attempt,
"total": CONF.deploy.iscsi_verify_attempts})
else:
msg = _("Max attempts to verify a iSCSI connection is active reached "
"and the connection didn't become active.")
LOG.error(msg)
raise exception.InstanceDeployFailure(msg)
def force_iscsi_lun_update(target_iqn):
"""force iSCSI initiator to re-read luns."""
LOG.debug("Re-reading iSCSI luns.")
utils.execute('iscsiadm',
'-m', 'node',
'-T', target_iqn,
'-R',
run_as_root=True,
check_exit_code=[0])
def logout_iscsi(portal_address, portal_port, target_iqn): def logout_iscsi(portal_address, portal_port, target_iqn):
@ -134,24 +174,31 @@ def make_partitions(dev, root_mb, swap_mb, ephemeral_mb,
path as Value for the partitions created by this method. path as Value for the partitions created by this method.
""" """
LOG.debug("Starting to partition the disk device: %(dev)s",
{'dev': dev})
part_template = dev + '-part%d' part_template = dev + '-part%d'
part_dict = {} part_dict = {}
dp = disk_partitioner.DiskPartitioner(dev) dp = disk_partitioner.DiskPartitioner(dev)
if ephemeral_mb: if ephemeral_mb:
LOG.debug("Add epheneral partition to device: %(dev)s",
{'dev': dev})
part_num = dp.add_partition(ephemeral_mb) part_num = dp.add_partition(ephemeral_mb)
part_dict['ephemeral'] = part_template % part_num part_dict['ephemeral'] = part_template % part_num
if swap_mb: if swap_mb:
LOG.debug("Add Swap partition to device: %(dev)s",
{'dev': dev})
part_num = dp.add_partition(swap_mb, fs_type='linux-swap') part_num = dp.add_partition(swap_mb, fs_type='linux-swap')
part_dict['swap'] = part_template % part_num part_dict['swap'] = part_template % part_num
if configdrive_mb: if configdrive_mb:
LOG.debug("Add config drive partition to device: %(dev)s",
{'dev': dev})
part_num = dp.add_partition(configdrive_mb) part_num = dp.add_partition(configdrive_mb)
part_dict['configdrive'] = part_template % part_num part_dict['configdrive'] = part_template % part_num
# NOTE(lucasagomes): Make the root partition the last partition. This # NOTE(lucasagomes): Make the root partition the last partition. This
# enables tools like cloud-init's growroot utility to expand the root # enables tools like cloud-init's growroot utility to expand the root
# partition until the end of the disk. # partition until the end of the disk.
LOG.debug("Add root partition to device: %(dev)s", {'dev': dev})
part_num = dp.add_partition(root_mb) part_num = dp.add_partition(root_mb)
part_dict['root'] = part_template % part_num part_dict['root'] = part_template % part_num
@ -266,6 +313,8 @@ def destroy_disk_metadata(dev, node_uuid):
""" """
# NOTE(NobodyCam): This is needed to work around bug: # NOTE(NobodyCam): This is needed to work around bug:
# https://bugs.launchpad.net/ironic/+bug/1317647 # https://bugs.launchpad.net/ironic/+bug/1317647
LOG.debug("Start destroy disk metadata for node %(node)s.",
{'node': node_uuid})
try: try:
utils.execute('dd', 'if=/dev/zero', 'of=%s' % dev, utils.execute('dd', 'if=/dev/zero', 'of=%s' % dev,
'bs=512', 'count=36', run_as_root=True, 'bs=512', 'count=36', run_as_root=True,
@ -418,6 +467,9 @@ def work_on_disk(dev, root_mb, swap_mb, ephemeral_mb, ephemeral_format,
for part in ('swap', 'ephemeral', 'configdrive'): for part in ('swap', 'ephemeral', 'configdrive'):
part_device = part_dict.get(part) part_device = part_dict.get(part)
LOG.debug("checking for %(part)s device (%(dev)s) on node "
"%(node)s.", {'part': part, 'dev': part_device,
'node': node_uuid})
if part_device and not is_block_device(part_device): if part_device and not is_block_device(part_device):
raise exception.InstanceDeployFailure( raise exception.InstanceDeployFailure(
_("'%(partition)s' device '%(part_device)s' not found") % _("'%(partition)s' device '%(part_device)s' not found") %

View File

@ -442,6 +442,62 @@ class PhysicalWorkTestCase(tests_base.TestCase):
self.assertEqual(root_uuid, returned_root_uuid) self.assertEqual(root_uuid, returned_root_uuid)
mock_unlink.assert_called_once_with('configdrive-path') mock_unlink.assert_called_once_with('configdrive-path')
@mock.patch.object(common_utils, 'execute')
def test_verify_iscsi_connection_raises(self, mock_exec):
iqn = 'iqn.xyz'
mock_exec.return_value = ['iqn.abc', '']
self.assertRaises(exception.InstanceDeployFailure,
utils.verify_iscsi_connection, iqn)
self.assertTrue(mock_exec.called)
@mock.patch.object(common_utils, 'execute')
def test_verify_iscsi_connection(self, mock_exec):
iqn = 'iqn.xyz'
mock_exec.return_value = ['iqn.xyz', '']
utils.verify_iscsi_connection(iqn)
mock_exec.assert_called_once_with('iscsiadm',
'-m', 'node',
'-S',
run_as_root=True,
check_exit_code=[0])
@mock.patch.object(common_utils, 'execute')
def test_force_iscsi_lun_update(self, mock_exec):
iqn = 'iqn.xyz'
utils.force_iscsi_lun_update(iqn)
mock_exec.assert_called_once_with('iscsiadm',
'-m', 'node',
'-T', iqn,
'-R',
run_as_root=True,
check_exit_code=[0])
@mock.patch.object(common_utils, 'execute')
@mock.patch.object(utils, 'verify_iscsi_connection')
@mock.patch.object(utils, 'force_iscsi_lun_update')
def test_login_iscsi_calls_verify_and_update(self,
mock_update,
mock_verify,
mock_exec):
address = '127.0.0.1'
port = 3306
iqn = 'iqn.xyz'
mock_exec.return_value = ['iqn.xyz', '']
utils.login_iscsi(address, port, iqn)
mock_exec.assert_called_once_with('iscsiadm',
'-m', 'node',
'-p', '%s:%s' % (address, port),
'-T', iqn,
'--login',
run_as_root=True,
check_exit_code=[0],
attempts=5,
delay_on_retry=True)
mock_verify.assert_called_once_with(iqn)
mock_update.assert_called_once_with(iqn)
def test_always_logout_and_delete_iscsi(self): def test_always_logout_and_delete_iscsi(self):
"""Check if logout_iscsi() and delete_iscsi() are called. """Check if logout_iscsi() and delete_iscsi() are called.