Merge "Improve IPA logging and exception handling"

This commit is contained in:
Jenkins 2015-08-03 16:31:47 +00:00 committed by Gerrit Code Review
commit bde6ed5570
5 changed files with 67 additions and 15 deletions
ironic_python_agent

@ -26,12 +26,12 @@ class ExceptionType(wtypes.UserType):
return value
def tobasetype(self, value):
"""Turn a RESTError into a dict."""
"""Turn an Exception into a dict."""
return {
'type': value.__class__.__name__,
'code': value.status_code,
'message': value.message,
'details': value.details,
'code': getattr(value, 'status_code', 500),
'message': str(value),
'details': getattr(value, 'details', ''),
}
frombasetype = tobasetype

@ -24,6 +24,8 @@ from ironic_python_agent import encoding
from ironic_python_agent import errors
from ironic_python_agent import utils
LOG = log.getLogger()
class AgentCommandStatus(object):
"""Mapping of agent command statuses."""
@ -150,7 +152,8 @@ class AsyncCommandResult(BaseCommandResult):
if isinstance(result, (bytes, six.text_type)):
result = {'result': '{}: {}'.format(self.command_name, result)}
LOG.info('Command: %(name)s, result: %(result)s',
{'name': self.command_name, 'result': result})
with self.command_state_lock:
self.command_result = result
self.command_status = AgentCommandStatus.SUCCEEDED
@ -159,7 +162,11 @@ class AsyncCommandResult(BaseCommandResult):
self.command_error = e
self.command_status = AgentCommandStatus.CLEAN_VERSION_MISMATCH
self.command_result = None
LOG.info('Clean version mismatch for command %s',
self.command_name)
except Exception as e:
LOG.exception('Command failed: %(name)s, error: %(err)s',
{'name': self.command_name, 'err': e})
if not isinstance(e, errors.RESTError):
e = errors.CommandExecutionError(str(e))
@ -220,11 +227,16 @@ class ExecuteCommandMixin(object):
def execute_command(self, command_name, **kwargs):
"""Execute an agent command."""
with self.command_lock:
LOG.info('Executing command: %(name)s with args: %(args)s',
{'name': command_name, 'args': kwargs})
extension_part, command_part = self.split_command(command_name)
if len(self.command_results) > 0:
last_command = list(self.command_results.values())[-1]
if not last_command.is_done():
LOG.info('Tried to execute %(command)s, agent is still '
'executing %(last)s', {'command': command_name,
'last': last_command})
raise errors.CommandExecutionError('agent is busy')
try:
@ -232,17 +244,21 @@ class ExecuteCommandMixin(object):
result = ext.execute(command_part, **kwargs)
except KeyError:
# Extension Not found
LOG.exception('Extension %s not found', extension_part)
raise errors.RequestedObjectNotFoundError('Extension',
extension_part)
except errors.InvalidContentError as e:
# Any command may raise a InvalidContentError which will be
# returned to the caller directly.
LOG.exception('Invalid content error: %s', e)
raise e
except Exception as e:
# Other errors are considered command execution errors, and are
# recorded as an
# recorded as a failed SyncCommandResult with an error message
LOG.exception('Command execution error: %s', e)
result = SyncCommandResult(command_name, kwargs, False, e)
LOG.info('Command %(name)s completed: %(result)s',
{'name': command_name, 'result': result})
self.command_results[result.id] = result
return result

@ -12,10 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_log import log
from ironic_python_agent import errors
from ironic_python_agent.extensions import base
from ironic_python_agent import hardware
LOG = log.getLogger()
class CleanExtension(base.BaseAgentExtension):
@base.sync_command('get_clean_steps')
@ -28,10 +32,13 @@ class CleanExtension(base.BaseAgentExtension):
:returns: A list of clean steps with keys step, priority, and
reboot_requested
"""
LOG.debug('Getting clean steps, called with node: %(node)s, '
'ports: %(ports)s', {'node': node, 'ports': ports})
# Results should be a dict, not a list
steps = hardware.dispatch_to_all_managers('get_clean_steps',
node, ports)
LOG.debug('Returning clean steps: %s', steps)
return {
'clean_steps': steps,
'hardware_manager_version': _get_current_clean_version()
@ -52,17 +59,23 @@ class CleanExtension(base.BaseAgentExtension):
the step returns.
"""
# Ensure the agent is still the same version, or raise an exception
LOG.info('Executing clean step %s', step)
_check_clean_version(clean_version)
if 'step' not in step:
raise ValueError('Malformed clean_step, no "step" key: %s'.format(
step))
msg = 'Malformed clean_step, no "step" key: %s' % step
LOG.error(msg)
raise ValueError(msg)
try:
result = hardware.dispatch_to_managers(step['step'], node, ports)
except Exception as e:
raise errors.CleaningError(
'Error performing clean_step %(step)s: %(err)s' %
{'step': step['step'], 'err': e})
msg = ('Error performing clean_step %(step)s: %(err)s' %
{'step': step['step'], 'err': e})
LOG.exception(msg)
raise errors.CleaningError(msg)
LOG.info('Clean step completed: %(step)s, result: %(result)s',
{'step': step, 'result': result})
# Return the step that was executed so we can dispatch
# to the appropriate Ironic interface
return {
@ -78,6 +91,9 @@ def _check_clean_version(clean_version=None):
return
agent_version = _get_current_clean_version()
if clean_version != agent_version:
LOG.warning('Mismatched clean versions. Agent version: %(agent), '
'node version: %(node)', {'agent': agent_version,
'node': clean_version})
raise errors.CleanVersionMismatch(agent_version=agent_version,
node_version=clean_version)

@ -207,26 +207,34 @@ class StandbyExtension(base.BaseAgentExtension):
@base.async_command('cache_image', _validate_image_info)
def cache_image(self, image_info=None, force=False):
LOG.debug('Caching image %s', image_info['id'])
device = hardware.dispatch_to_managers('get_os_install_device')
result_msg = 'image ({0}) already present on device {1}'
if self.cached_image_id != image_info['id'] or force:
LOG.debug('Already had %s cached, overwriting',
self.cached_image_id)
_download_image(image_info)
_write_image(image_info, device)
self.cached_image_id = image_info['id']
result_msg = 'image ({0}) cached to device {1}'
return result_msg.format(image_info['id'], device)
msg = result_msg.format(image_info['id'], device)
LOG.info(msg)
return msg
@base.async_command('prepare_image', _validate_image_info)
def prepare_image(self,
image_info=None,
configdrive=None):
LOG.debug('Preparing image %s', image_info['id'])
device = hardware.dispatch_to_managers('get_os_install_device')
# don't write image again if already cached
if self.cached_image_id != image_info['id']:
LOG.debug('Already had %s cached, overwriting',
self.cached_image_id)
_download_image(image_info)
_write_image(image_info, device)
self.cached_image_id = image_info['id']
@ -234,8 +242,10 @@ class StandbyExtension(base.BaseAgentExtension):
if configdrive is not None:
_write_configdrive_to_partition(configdrive, device)
return 'image ({0}) written to device {1}'.format(image_info['id'],
device)
msg = ('image ({0}) written to device {1}'.format(
image_info['id'], device))
LOG.info(msg)
return msg
def _run_shutdown_script(self, parameter):
script = _path_to_script('shell/shutdown.sh')

@ -615,6 +615,11 @@ def dispatch_to_all_managers(method, *args, **kwargs):
LOG.debug('HardwareManager {0} does not support {1}'
.format(manager, method))
continue
except Exception as e:
LOG.exception('Unexpected error dispatching %(method)s to '
'manager %(manager)s: %(e)s',
{'method': method, 'manager': manager, 'e': e})
raise
responses[manager.__class__.__name__] = response
else:
LOG.debug('HardwareManager {0} does not have method {1}'
@ -651,6 +656,11 @@ def dispatch_to_managers(method, *args, **kwargs):
except(errors.IncompatibleHardwareMethodError):
LOG.debug('HardwareManager {0} does not support {1}'
.format(manager, method))
except Exception as e:
LOG.exception('Unexpected error dispatching %(method)s to '
'manager %(manager)s: %(e)s',
{'method': method, 'manager': manager, 'e': e})
raise
else:
LOG.debug('HardwareManager {0} does not have method {1}'
.format(manager, method))