Cleanup on ssh-agent failure

I'm getting an error when executing ssh-agent add. I'm not sure why and
the logs don't provide any useful information nor does the application
cleanup the ssh-agents when it fails.

Turns out I didn't have permission to read the private ssh private key
on the filesystem.

Provide useful logging and cleanup here.

Change-Id: I1788a57f51e3516c91e12d1e0a20a4b842cedb20
Signed-off-by: Jamie Lennox <jamielennox@gmail.com>
changes/44/481344/3
Jamie Lennox 5 years ago
parent 91fad2609f
commit 377177c03c
  1. 25
      zuul/executor/server.py

@ -118,12 +118,12 @@ class SshAgent(object):
env.update(self.env)
key_path = os.path.expanduser(key_path)
self.log.debug('Adding SSH Key {}'.format(key_path))
output = ''
try:
output = subprocess.check_output(['ssh-add', key_path], env=env,
stderr=subprocess.PIPE)
except subprocess.CalledProcessError:
self.log.error('ssh-add failed: {}'.format(output))
subprocess.check_output(['ssh-add', key_path], env=env,
stderr=subprocess.PIPE)
except subprocess.CalledProcessError as e:
self.log.error('ssh-add failed. stdout: %s, stderr: %s',
e.output, e.stderr)
raise
self.log.info('Added SSH Key {}'.format(key_path))
@ -613,7 +613,12 @@ class ExecutorServer(object):
def executeJob(self, job):
self.job_workers[job.unique] = AnsibleJob(self, job)
self.job_workers[job.unique].run()
try:
self.job_workers[job.unique].run()
except Exception:
del self.job_workers[job.unique]
raise
def finishJob(self, unique):
del(self.job_workers[unique])
@ -704,7 +709,13 @@ class AnsibleJob(object):
def run(self):
self.ssh_agent.start()
self.ssh_agent.add(self.private_key_file)
try:
self.ssh_agent.add(self.private_key_file)
except Exception:
self.ssh_agent.stop()
raise
self.running = True
self.thread = threading.Thread(target=self.execute)
self.thread.start()

Loading…
Cancel
Save