Merge "Improve IPA logging and exception handling"
This commit is contained in:
@@ -26,12 +26,12 @@ class ExceptionType(wtypes.UserType):
|
|||||||
return value
|
return value
|
||||||
|
|
||||||
def tobasetype(self, value):
|
def tobasetype(self, value):
|
||||||
"""Turn a RESTError into a dict."""
|
"""Turn an Exception into a dict."""
|
||||||
return {
|
return {
|
||||||
'type': value.__class__.__name__,
|
'type': value.__class__.__name__,
|
||||||
'code': value.status_code,
|
'code': getattr(value, 'status_code', 500),
|
||||||
'message': value.message,
|
'message': str(value),
|
||||||
'details': value.details,
|
'details': getattr(value, 'details', ''),
|
||||||
}
|
}
|
||||||
|
|
||||||
frombasetype = tobasetype
|
frombasetype = tobasetype
|
||||||
|
@@ -24,6 +24,8 @@ from ironic_python_agent import encoding
|
|||||||
from ironic_python_agent import errors
|
from ironic_python_agent import errors
|
||||||
from ironic_python_agent import utils
|
from ironic_python_agent import utils
|
||||||
|
|
||||||
|
LOG = log.getLogger()
|
||||||
|
|
||||||
|
|
||||||
class AgentCommandStatus(object):
|
class AgentCommandStatus(object):
|
||||||
"""Mapping of agent command statuses."""
|
"""Mapping of agent command statuses."""
|
||||||
@@ -150,7 +152,8 @@ class AsyncCommandResult(BaseCommandResult):
|
|||||||
|
|
||||||
if isinstance(result, (bytes, six.text_type)):
|
if isinstance(result, (bytes, six.text_type)):
|
||||||
result = {'result': '{}: {}'.format(self.command_name, result)}
|
result = {'result': '{}: {}'.format(self.command_name, result)}
|
||||||
|
LOG.info('Command: %(name)s, result: %(result)s',
|
||||||
|
{'name': self.command_name, 'result': result})
|
||||||
with self.command_state_lock:
|
with self.command_state_lock:
|
||||||
self.command_result = result
|
self.command_result = result
|
||||||
self.command_status = AgentCommandStatus.SUCCEEDED
|
self.command_status = AgentCommandStatus.SUCCEEDED
|
||||||
@@ -159,7 +162,11 @@ class AsyncCommandResult(BaseCommandResult):
|
|||||||
self.command_error = e
|
self.command_error = e
|
||||||
self.command_status = AgentCommandStatus.CLEAN_VERSION_MISMATCH
|
self.command_status = AgentCommandStatus.CLEAN_VERSION_MISMATCH
|
||||||
self.command_result = None
|
self.command_result = None
|
||||||
|
LOG.info('Clean version mismatch for command %s',
|
||||||
|
self.command_name)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
LOG.exception('Command failed: %(name)s, error: %(err)s',
|
||||||
|
{'name': self.command_name, 'err': e})
|
||||||
if not isinstance(e, errors.RESTError):
|
if not isinstance(e, errors.RESTError):
|
||||||
e = errors.CommandExecutionError(str(e))
|
e = errors.CommandExecutionError(str(e))
|
||||||
|
|
||||||
@@ -220,11 +227,16 @@ class ExecuteCommandMixin(object):
|
|||||||
def execute_command(self, command_name, **kwargs):
|
def execute_command(self, command_name, **kwargs):
|
||||||
"""Execute an agent command."""
|
"""Execute an agent command."""
|
||||||
with self.command_lock:
|
with self.command_lock:
|
||||||
|
LOG.info('Executing command: %(name)s with args: %(args)s',
|
||||||
|
{'name': command_name, 'args': kwargs})
|
||||||
extension_part, command_part = self.split_command(command_name)
|
extension_part, command_part = self.split_command(command_name)
|
||||||
|
|
||||||
if len(self.command_results) > 0:
|
if len(self.command_results) > 0:
|
||||||
last_command = list(self.command_results.values())[-1]
|
last_command = list(self.command_results.values())[-1]
|
||||||
if not last_command.is_done():
|
if not last_command.is_done():
|
||||||
|
LOG.info('Tried to execute %(command)s, agent is still '
|
||||||
|
'executing %(last)s', {'command': command_name,
|
||||||
|
'last': last_command})
|
||||||
raise errors.CommandExecutionError('agent is busy')
|
raise errors.CommandExecutionError('agent is busy')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -232,17 +244,21 @@ class ExecuteCommandMixin(object):
|
|||||||
result = ext.execute(command_part, **kwargs)
|
result = ext.execute(command_part, **kwargs)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# Extension Not found
|
# Extension Not found
|
||||||
|
LOG.exception('Extension %s not found', extension_part)
|
||||||
raise errors.RequestedObjectNotFoundError('Extension',
|
raise errors.RequestedObjectNotFoundError('Extension',
|
||||||
extension_part)
|
extension_part)
|
||||||
except errors.InvalidContentError as e:
|
except errors.InvalidContentError as e:
|
||||||
# Any command may raise a InvalidContentError which will be
|
# Any command may raise a InvalidContentError which will be
|
||||||
# returned to the caller directly.
|
# returned to the caller directly.
|
||||||
|
LOG.exception('Invalid content error: %s', e)
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Other errors are considered command execution errors, and are
|
# Other errors are considered command execution errors, and are
|
||||||
# recorded as an
|
# recorded as a failed SyncCommandResult with an error message
|
||||||
|
LOG.exception('Command execution error: %s', e)
|
||||||
result = SyncCommandResult(command_name, kwargs, False, e)
|
result = SyncCommandResult(command_name, kwargs, False, e)
|
||||||
|
LOG.info('Command %(name)s completed: %(result)s',
|
||||||
|
{'name': command_name, 'result': result})
|
||||||
self.command_results[result.id] = result
|
self.command_results[result.id] = result
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@@ -12,10 +12,14 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
from oslo_log import log
|
||||||
|
|
||||||
from ironic_python_agent import errors
|
from ironic_python_agent import errors
|
||||||
from ironic_python_agent.extensions import base
|
from ironic_python_agent.extensions import base
|
||||||
from ironic_python_agent import hardware
|
from ironic_python_agent import hardware
|
||||||
|
|
||||||
|
LOG = log.getLogger()
|
||||||
|
|
||||||
|
|
||||||
class CleanExtension(base.BaseAgentExtension):
|
class CleanExtension(base.BaseAgentExtension):
|
||||||
@base.sync_command('get_clean_steps')
|
@base.sync_command('get_clean_steps')
|
||||||
@@ -28,10 +32,13 @@ class CleanExtension(base.BaseAgentExtension):
|
|||||||
:returns: A list of clean steps with keys step, priority, and
|
:returns: A list of clean steps with keys step, priority, and
|
||||||
reboot_requested
|
reboot_requested
|
||||||
"""
|
"""
|
||||||
|
LOG.debug('Getting clean steps, called with node: %(node)s, '
|
||||||
|
'ports: %(ports)s', {'node': node, 'ports': ports})
|
||||||
# Results should be a dict, not a list
|
# Results should be a dict, not a list
|
||||||
steps = hardware.dispatch_to_all_managers('get_clean_steps',
|
steps = hardware.dispatch_to_all_managers('get_clean_steps',
|
||||||
node, ports)
|
node, ports)
|
||||||
|
|
||||||
|
LOG.debug('Returning clean steps: %s', steps)
|
||||||
return {
|
return {
|
||||||
'clean_steps': steps,
|
'clean_steps': steps,
|
||||||
'hardware_manager_version': _get_current_clean_version()
|
'hardware_manager_version': _get_current_clean_version()
|
||||||
@@ -52,17 +59,23 @@ class CleanExtension(base.BaseAgentExtension):
|
|||||||
the step returns.
|
the step returns.
|
||||||
"""
|
"""
|
||||||
# Ensure the agent is still the same version, or raise an exception
|
# Ensure the agent is still the same version, or raise an exception
|
||||||
|
LOG.info('Executing clean step %s', step)
|
||||||
_check_clean_version(clean_version)
|
_check_clean_version(clean_version)
|
||||||
|
|
||||||
if 'step' not in step:
|
if 'step' not in step:
|
||||||
raise ValueError('Malformed clean_step, no "step" key: %s'.format(
|
msg = 'Malformed clean_step, no "step" key: %s' % step
|
||||||
step))
|
LOG.error(msg)
|
||||||
|
raise ValueError(msg)
|
||||||
try:
|
try:
|
||||||
result = hardware.dispatch_to_managers(step['step'], node, ports)
|
result = hardware.dispatch_to_managers(step['step'], node, ports)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise errors.CleaningError(
|
msg = ('Error performing clean_step %(step)s: %(err)s' %
|
||||||
'Error performing clean_step %(step)s: %(err)s' %
|
{'step': step['step'], 'err': e})
|
||||||
{'step': step['step'], 'err': e})
|
LOG.exception(msg)
|
||||||
|
raise errors.CleaningError(msg)
|
||||||
|
|
||||||
|
LOG.info('Clean step completed: %(step)s, result: %(result)s',
|
||||||
|
{'step': step, 'result': result})
|
||||||
# Return the step that was executed so we can dispatch
|
# Return the step that was executed so we can dispatch
|
||||||
# to the appropriate Ironic interface
|
# to the appropriate Ironic interface
|
||||||
return {
|
return {
|
||||||
@@ -78,6 +91,9 @@ def _check_clean_version(clean_version=None):
|
|||||||
return
|
return
|
||||||
agent_version = _get_current_clean_version()
|
agent_version = _get_current_clean_version()
|
||||||
if clean_version != agent_version:
|
if clean_version != agent_version:
|
||||||
|
LOG.warning('Mismatched clean versions. Agent version: %(agent), '
|
||||||
|
'node version: %(node)', {'agent': agent_version,
|
||||||
|
'node': clean_version})
|
||||||
raise errors.CleanVersionMismatch(agent_version=agent_version,
|
raise errors.CleanVersionMismatch(agent_version=agent_version,
|
||||||
node_version=clean_version)
|
node_version=clean_version)
|
||||||
|
|
||||||
|
@@ -207,26 +207,34 @@ class StandbyExtension(base.BaseAgentExtension):
|
|||||||
|
|
||||||
@base.async_command('cache_image', _validate_image_info)
|
@base.async_command('cache_image', _validate_image_info)
|
||||||
def cache_image(self, image_info=None, force=False):
|
def cache_image(self, image_info=None, force=False):
|
||||||
|
LOG.debug('Caching image %s', image_info['id'])
|
||||||
device = hardware.dispatch_to_managers('get_os_install_device')
|
device = hardware.dispatch_to_managers('get_os_install_device')
|
||||||
|
|
||||||
result_msg = 'image ({0}) already present on device {1}'
|
result_msg = 'image ({0}) already present on device {1}'
|
||||||
|
|
||||||
if self.cached_image_id != image_info['id'] or force:
|
if self.cached_image_id != image_info['id'] or force:
|
||||||
|
LOG.debug('Already had %s cached, overwriting',
|
||||||
|
self.cached_image_id)
|
||||||
_download_image(image_info)
|
_download_image(image_info)
|
||||||
_write_image(image_info, device)
|
_write_image(image_info, device)
|
||||||
self.cached_image_id = image_info['id']
|
self.cached_image_id = image_info['id']
|
||||||
result_msg = 'image ({0}) cached to device {1}'
|
result_msg = 'image ({0}) cached to device {1}'
|
||||||
|
|
||||||
return result_msg.format(image_info['id'], device)
|
msg = result_msg.format(image_info['id'], device)
|
||||||
|
LOG.info(msg)
|
||||||
|
return msg
|
||||||
|
|
||||||
@base.async_command('prepare_image', _validate_image_info)
|
@base.async_command('prepare_image', _validate_image_info)
|
||||||
def prepare_image(self,
|
def prepare_image(self,
|
||||||
image_info=None,
|
image_info=None,
|
||||||
configdrive=None):
|
configdrive=None):
|
||||||
|
LOG.debug('Preparing image %s', image_info['id'])
|
||||||
device = hardware.dispatch_to_managers('get_os_install_device')
|
device = hardware.dispatch_to_managers('get_os_install_device')
|
||||||
|
|
||||||
# don't write image again if already cached
|
# don't write image again if already cached
|
||||||
if self.cached_image_id != image_info['id']:
|
if self.cached_image_id != image_info['id']:
|
||||||
|
LOG.debug('Already had %s cached, overwriting',
|
||||||
|
self.cached_image_id)
|
||||||
_download_image(image_info)
|
_download_image(image_info)
|
||||||
_write_image(image_info, device)
|
_write_image(image_info, device)
|
||||||
self.cached_image_id = image_info['id']
|
self.cached_image_id = image_info['id']
|
||||||
@@ -234,8 +242,10 @@ class StandbyExtension(base.BaseAgentExtension):
|
|||||||
if configdrive is not None:
|
if configdrive is not None:
|
||||||
_write_configdrive_to_partition(configdrive, device)
|
_write_configdrive_to_partition(configdrive, device)
|
||||||
|
|
||||||
return 'image ({0}) written to device {1}'.format(image_info['id'],
|
msg = ('image ({0}) written to device {1}'.format(
|
||||||
device)
|
image_info['id'], device))
|
||||||
|
LOG.info(msg)
|
||||||
|
return msg
|
||||||
|
|
||||||
def _run_shutdown_script(self, parameter):
|
def _run_shutdown_script(self, parameter):
|
||||||
script = _path_to_script('shell/shutdown.sh')
|
script = _path_to_script('shell/shutdown.sh')
|
||||||
|
@@ -615,6 +615,11 @@ def dispatch_to_all_managers(method, *args, **kwargs):
|
|||||||
LOG.debug('HardwareManager {0} does not support {1}'
|
LOG.debug('HardwareManager {0} does not support {1}'
|
||||||
.format(manager, method))
|
.format(manager, method))
|
||||||
continue
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
LOG.exception('Unexpected error dispatching %(method)s to '
|
||||||
|
'manager %(manager)s: %(e)s',
|
||||||
|
{'method': method, 'manager': manager, 'e': e})
|
||||||
|
raise
|
||||||
responses[manager.__class__.__name__] = response
|
responses[manager.__class__.__name__] = response
|
||||||
else:
|
else:
|
||||||
LOG.debug('HardwareManager {0} does not have method {1}'
|
LOG.debug('HardwareManager {0} does not have method {1}'
|
||||||
@@ -651,6 +656,11 @@ def dispatch_to_managers(method, *args, **kwargs):
|
|||||||
except(errors.IncompatibleHardwareMethodError):
|
except(errors.IncompatibleHardwareMethodError):
|
||||||
LOG.debug('HardwareManager {0} does not support {1}'
|
LOG.debug('HardwareManager {0} does not support {1}'
|
||||||
.format(manager, method))
|
.format(manager, method))
|
||||||
|
except Exception as e:
|
||||||
|
LOG.exception('Unexpected error dispatching %(method)s to '
|
||||||
|
'manager %(manager)s: %(e)s',
|
||||||
|
{'method': method, 'manager': manager, 'e': e})
|
||||||
|
raise
|
||||||
else:
|
else:
|
||||||
LOG.debug('HardwareManager {0} does not have method {1}'
|
LOG.debug('HardwareManager {0} does not have method {1}'
|
||||||
.format(manager, method))
|
.format(manager, method))
|
||||||
|
Reference in New Issue
Block a user