Avoid reboot loop when patch fails
When there is a fail during remote pull a reboot-required patch to a host, the system enters a reboot loop mode. This happens because the reboot required flag was left set even with a failure in the installation process. This commit fixes it by checking if the flag still exists after no changes were made by the install, it deletes the flag and returns a proper error. Test-plan: PASS: Fail a patch during deploy host - The system should set the state to host-failed - The system must not enter in reboot loop Story: 2010676 Task: 51192 Change-Id: Ib311f28911620cd14df357e06ff9e5afcf82b745 Signed-off-by: Lindley Vieira <lindley.vieira@windriver.com>
This commit is contained in:
parent
c4f8751daa
commit
bc30e3464e
software/software
@ -288,7 +288,7 @@ def pull_ostree_from_remote(remote=None):
|
||||
ref_cmd = "ostree refs --force --create=%s %s" % (ref, constants.OSTREE_REF)
|
||||
|
||||
try:
|
||||
subprocess.run(cmd % ref, shell=True, check=True, capture_output=True)
|
||||
output = subprocess.run(cmd % ref, shell=True, check=True, capture_output=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
msg = "Failed to pull from %s remote into sysroot ostree" % ref
|
||||
err_msg = "OSTree Pull Error: return code: %s, Output: %s" \
|
||||
@ -296,6 +296,10 @@ def pull_ostree_from_remote(remote=None):
|
||||
LOG.exception(err_msg)
|
||||
raise OSTreeCommandFail(msg)
|
||||
|
||||
# Log to help identify errors
|
||||
msg = "Remote pull output: %s" % output
|
||||
LOG.info(msg)
|
||||
|
||||
if ref_cmd:
|
||||
try:
|
||||
subprocess.run(ref_cmd, shell=True, check=True, capture_output=True)
|
||||
|
@ -510,6 +510,12 @@ class PatchAgent(PatchService):
|
||||
self.listener.bind(('', self.port))
|
||||
self.listener.listen(2) # Allow two connections, for two controllers
|
||||
|
||||
def set_install_failed_flags(self):
|
||||
"""Set flags and states for a failed patch"""
|
||||
self.patch_failed = True
|
||||
setflag(patch_failed_file)
|
||||
self.state = constants.PATCH_AGENT_STATE_INSTALL_FAILED
|
||||
|
||||
def query(self, major_release=None):
|
||||
"""Check current patch state """
|
||||
if not self.install_local and not check_install_uuid():
|
||||
@ -574,10 +580,7 @@ class PatchAgent(PatchService):
|
||||
# controller, we don't want to install patches.
|
||||
if not self.install_local and not check_install_uuid():
|
||||
LOG.error("Failed install_uuid check. Skipping install")
|
||||
|
||||
self.patch_failed = True
|
||||
setflag(patch_failed_file)
|
||||
self.state = constants.PATCH_AGENT_STATE_INSTALL_FAILED
|
||||
self.set_install_failed_flags()
|
||||
|
||||
# Send a hello to provide a state update
|
||||
if self.sock_out is not None:
|
||||
@ -611,9 +614,7 @@ class PatchAgent(PatchService):
|
||||
clearflag(patch_failed_file)
|
||||
self.state = constants.PATCH_AGENT_STATE_IDLE
|
||||
else:
|
||||
self.patch_failed = True
|
||||
setflag(patch_failed_file)
|
||||
self.state = constants.PATCH_AGENT_STATE_INSTALL_FAILED
|
||||
self.set_install_failed_flags()
|
||||
return success
|
||||
|
||||
# prepare major release deployment
|
||||
@ -754,22 +755,23 @@ class PatchAgent(PatchService):
|
||||
except Exception as e:
|
||||
LOG.exception("Failure running hooks: %s" % str(e))
|
||||
setflag(run_hooks_flag)
|
||||
self.patch_failed = True
|
||||
setflag(patch_failed_file)
|
||||
self.state = constants.PATCH_AGENT_STATE_INSTALL_FAILED
|
||||
self.set_install_failed_flags()
|
||||
success = False
|
||||
else:
|
||||
# Update the patch_failed flag
|
||||
self.patch_failed = True
|
||||
setflag(patch_failed_file)
|
||||
self.state = constants.PATCH_AGENT_STATE_INSTALL_FAILED
|
||||
self.set_install_failed_flags()
|
||||
|
||||
clearflag(patch_installing_file)
|
||||
self.query()
|
||||
|
||||
self.query() # Update self.changes
|
||||
if self.changes:
|
||||
LOG.warning("Installing the patch did not change the patch current status")
|
||||
|
||||
if os.path.exists(node_is_software_updated_rr_file):
|
||||
LOG.error("No deployment created and reboot required flag exists")
|
||||
self.set_install_failed_flags()
|
||||
# Clear flag to avoid reboot loop
|
||||
clearflag(node_is_software_updated_rr_file)
|
||||
|
||||
# Send a hello to provide a state update
|
||||
if self.sock_out is not None:
|
||||
hello_ack = PatchMessageHelloAgentAck()
|
||||
|
Loading…
x
Reference in New Issue
Block a user