Convert deployment to use new ssh-admin playbook

This change converts the deployment process to use the new enable ssh admin
playbook instead of the shell commands and a mistral workflow.

Tests have been updated to reflect the use of the new methods.

Story: 2007212
Task: 38427
Depends-On: https://review.opendev.org/#/c/706196/
Depends-On: I5ab7f4d9dc39a74714c9bcfa39fa50cc586e8e08
Change-Id: I5c744e116f00e81f92c014fc1f599ad3db11ada2
Signed-off-by: Kevin Carter <kecarter@redhat.com>
This commit is contained in:
Kevin Carter 2020-01-23 13:14:29 -06:00 committed by Alex Schultz
parent 5855d41262
commit 4866ab8fde
11 changed files with 134 additions and 236 deletions

View File

@ -34,6 +34,10 @@ class FakeApp(object):
self.command_options = None
class FakeStackObject(object):
stack_name = 'undercloud'
class FakeClientManager(object):
def __init__(self):
self.identity = None

View File

@ -97,10 +97,12 @@ class TestFFWDUpgradePrepare(fakes.TestFFWDUpgradePrepare):
mock_write_overcloudrc.assert_called_once_with("mystack",
mock.ANY)
mock_enable_ssh_admin.assert_called_once_with(
self.cmd.log, self.app.client_manager, mock_stack,
mock_stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
600, 300)
parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_port_timeout
)
mock_overcloud_deploy.assert_called_once_with(parsed_args)
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'

View File

@ -101,10 +101,12 @@ class TestOvercloudUpgradePrepare(fakes.TestOvercloudUpgradePrepare):
mock.ANY)
mock_overcloud_deploy.assert_called_once_with(parsed_args)
mock_enable_ssh_admin.assert_called_once_with(
self.cmd.log, self.app.client_manager, mock_stack,
mock_stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
10, 10)
parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_port_timeout
)
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'take_action')

View File

@ -34,11 +34,9 @@ class TestAdminAuthorize(test_plugin.TestPluginV1):
self.cmd.take_action(parsed_args)
mock_get_host_and_enable_ssh_admin.assert_called_once_with(
self.cmd.log,
self.app.client_manager,
mock_stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key,
600,
300)
parsed_args.overcloud_ssh_port_timeout
)

View File

@ -17,7 +17,7 @@ import mock
from osc_lib.tests import utils
from tripleoclient import exceptions
from tripleoclient.tests.fakes import FakeFile
from tripleoclient.tests.fakes import FakeStackObject
from tripleoclient.workflows import deployment
@ -46,75 +46,28 @@ class TestDeploymentWorkflows(utils.TestCommand):
"message": "Fail.",
}])
@mock.patch('shutil.rmtree')
@mock.patch('os.chdir')
@mock.patch('tripleoclient.workflows.deployment.wait_for_ssh_port')
@mock.patch('tripleoclient.workflows.deployment.time.sleep')
@mock.patch('tripleoclient.utils.shutil.rmtree')
@mock.patch('tripleoclient.workflows.deployment.open')
@mock.patch('tripleoclient.utils.tempfile')
@mock.patch('tripleoclient.workflows.deployment.subprocess.check_call')
def test_enable_ssh_admin(self, mock_check_call, mock_tempfile, mock_open,
mock_rmtree, mock_sleep, mock_wait_for_ssh_port,
mock_chdir):
log = mock.Mock()
@mock.patch('tripleoclient.utils.run_ansible_playbook',
autospec=True)
def test_enable_ssh_admin(self, mock_rmtree, mock_chdir, mock_tempfile,
mock_playbook):
hosts = 'a', 'b', 'c'
ssh_user = 'test-user'
ssh_key = 'test-key'
timeout = 30
mock_tempfile.mkdtemp.return_value = '/foo'
mock_open.side_effect = [FakeFile('DEVNULL'), FakeFile('pubkey'),
FakeFile('key')]
mock_state = mock.Mock()
mock_state.state = 'SUCCESS'
self.workflow.executions.get.return_value = mock_state
deployment.enable_ssh_admin(log, self.app.client_manager,
'overcloud', hosts, ssh_user, ssh_key)
deployment.enable_ssh_admin(
FakeStackObject,
hosts,
ssh_user,
ssh_key,
timeout
)
# once for ssh-keygen, then twice per host
self.assertEqual(7, mock_check_call.call_count)
# execution ran
self.assertEqual(1, self.workflow.executions.create.call_count)
call_args = self.workflow.executions.create.call_args
self.assertEqual('tripleo.access.v1.enable_ssh_admin', call_args[0][0])
self.assertEqual(('a', 'b', 'c'),
call_args[1]['workflow_input']['ssh_servers'])
self.assertEqual('test-user',
call_args[1]['workflow_input']['ssh_user'])
self.assertEqual('key',
call_args[1]['workflow_input']['ssh_private_key'])
# tmpdir should be cleaned up
self.assertEqual(1, mock_rmtree.call_count)
self.assertEqual('/foo', mock_rmtree.call_args[0][0])
@mock.patch('os.chdir')
@mock.patch('tripleoclient.workflows.deployment.wait_for_ssh_port')
@mock.patch('tripleoclient.workflows.deployment.time.sleep')
@mock.patch('tripleoclient.utils.shutil.rmtree')
@mock.patch('tripleoclient.workflows.deployment.open')
@mock.patch('tripleoclient.utils.tempfile')
@mock.patch('tripleoclient.workflows.deployment.subprocess.check_call')
def test_enable_ssh_admin_error(self, mock_check_call, mock_tempfile,
mock_open, mock_rmtree, mock_sleep,
mock_wait_for_ssh_port, mock_chdir):
log = mock.Mock()
hosts = 'a', 'b', 'c'
ssh_user = 'test-user'
ssh_key = 'test-key'
mock_tempfile.mkdtemp.return_value = '/foo'
mock_open.side_effect = [FakeFile('DEVNULL'), FakeFile('pubkey'),
FakeFile('privkey')]
mock_state = mock.Mock()
mock_state.state = 'ERROR'
mock_state.to_dict.return_value = dict(state_info='an error')
self.workflow.executions.get.return_value = mock_state
self.assertRaises(exceptions.DeploymentError,
deployment.enable_ssh_admin,
log, self.app.client_manager,
'overcloud',
hosts, ssh_user, ssh_key)
self.assertEqual(1, mock_playbook.call_count)
@mock.patch('tripleoclient.utils.get_blacklisted_ip_addresses')
@mock.patch('tripleoclient.utils.get_role_net_ip_map')

View File

@ -208,7 +208,8 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
verbosity=0, quiet=False, extra_vars=None,
plan='overcloud', gathering_policy='smart',
extra_env_variables=None, parallel_run=False,
callback_whitelist=None, ansible_cfg=None):
callback_whitelist=None, ansible_cfg=None,
ansible_timeout=30):
"""Simple wrapper for ansible-playbook.
:param playbook: Playbook filename.
@ -282,6 +283,9 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
:param ansible_cfg: Path to an ansible configuration file. One will be
generated in the artifact path if this option is None.
:type ansible_cfg: String
:param ansible_timeout: Timeout for ansible connections.
:type ansible_timeout: int
"""
def _playbook_check(play):
@ -361,7 +365,7 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None,
env = os.environ.copy()
env['ANSIBLE_DISPLAY_FAILED_STDERR'] = True
env['ANSIBLE_FORKS'] = 36
env['ANSIBLE_TIMEOUT'] = 30
env['ANSIBLE_TIMEOUT'] = ansible_timeout
env['ANSIBLE_GATHER_TIMEOUT'] = 45
env['ANSIBLE_SSH_RETRIES'] = 3
env['ANSIBLE_PIPELINING'] = True

View File

@ -62,13 +62,13 @@ class Authorize(command.Command):
)
parser.add_argument(
'--overcloud-ssh-enable-timeout',
help=_('Timeout for the ssh enable process to finish.'),
help=_('This option no longer has any effect.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_TIMEOUT
)
parser.add_argument(
'--overcloud-ssh-port-timeout',
help=_('Timeout for to wait for the ssh port to become active.'),
help=_('Timeout for the ssh port to become active.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
)
@ -80,9 +80,9 @@ class Authorize(command.Command):
clients = self.app.client_manager
stack = oooutils.get_stack(clients.orchestration, parsed_args.stack)
deployment.get_hosts_and_enable_ssh_admin(
self.log, clients, stack,
stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)
parsed_args.overcloud_ssh_port_timeout
)

View File

@ -685,13 +685,13 @@ class DeployOvercloud(command.Command):
)
parser.add_argument(
'--overcloud-ssh-enable-timeout',
help=_('Timeout for the ssh enable process to finish.'),
help=_('This option no longer has any effect.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_TIMEOUT
)
parser.add_argument(
'--overcloud-ssh-port-timeout',
help=_('Timeout for to wait for the ssh port to become active.'),
help=_('Timeout for the ssh port to become active.'),
type=int,
default=constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
)
@ -951,12 +951,12 @@ class DeployOvercloud(command.Command):
try:
if not parsed_args.config_download_only:
deployment.get_hosts_and_enable_ssh_admin(
self.log, self.clients, stack,
stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)
parsed_args.overcloud_ssh_port_timeout
)
if parsed_args.config_download_timeout:
timeout = parsed_args.config_download_timeout * 60

View File

@ -92,10 +92,12 @@ class FFWDUpgradePrepare(DeployOvercloud):
# refresh stack info and enable ssh admin for Ansible-via-Mistral
stack = oooutils.get_stack(clients.orchestration, parsed_args.stack)
deployment.get_hosts_and_enable_ssh_admin(
self.log, clients, stack, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)
stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_port_timeout
)
self.log.info("FFWD Upgrade Prepare on stack {0} complete.".format(
parsed_args.stack))

View File

@ -82,10 +82,12 @@ class UpgradePrepare(DeployOvercloud):
# refresh stack info and enable ssh admin for Ansible-via-Mistral
stack = oooutils.get_stack(clients.orchestration, parsed_args.stack)
deployment.get_hosts_and_enable_ssh_admin(
self.log, clients, stack, parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user, parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_enable_timeout,
parsed_args.overcloud_ssh_port_timeout)
stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user,
parsed_args.overcloud_ssh_key,
parsed_args.overcloud_ssh_port_timeout
)
self.log.info("Completed Overcloud Upgrade Prepare for stack "
"{0}".format(stack_name))

View File

@ -12,19 +12,14 @@
from __future__ import print_function
import copy
import os
import pprint
import socket
import subprocess
import time
import yaml
from heatclient.common import event_utils
from openstackclient import shell
from tripleoclient.constants import ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
from tripleoclient.constants import ENABLE_SSH_ADMIN_STATUS_INTERVAL
from tripleoclient.constants import ENABLE_SSH_ADMIN_TIMEOUT
from tripleoclient.constants import ANSIBLE_TRIPLEO_PLAYBOOKS
from tripleoclient import exceptions
from tripleoclient import utils
@ -159,156 +154,92 @@ def get_overcloud_hosts(stack, ssh_network):
return ips
def wait_for_ssh_port(host, timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT):
start = int(time.time())
while True:
now = int(time.time())
if (now - start) > timeout:
raise exceptions.DeploymentError(
"Timed out waiting for port 22 from %s" % host)
# first check ipv4 then check ipv6
try:
sock = socket.socket()
sock.connect((host, 22))
sock.close()
return
except socket.error:
try:
sock = socket.socket(socket.AF_INET6)
sock.connect((host, 22))
sock.close()
return
except socket.error:
pass
def get_hosts_and_enable_ssh_admin(stack, overcloud_ssh_network,
overcloud_ssh_user, overcloud_ssh_key,
overcloud_ssh_port_timeout):
"""Enable ssh admin access.
time.sleep(1)
Get a list of hosts from a given stack and enable admin ssh across all of
them.
:param stack: Stack data.
:type stack: Object
:param overcloud_ssh_network: Network id.
:type overcloud_ssh_network: String
:param overcloud_ssh_user: SSH access username.
:type overcloud_ssh_user: String
:param overcloud_ssh_key: SSH access key.
:type overcloud_ssh_key: String
:param overcloud_ssh_port_timeout: Ansible connection timeout
:type overcloud_ssh_port_timeout: Int
"""
def get_hosts_and_enable_ssh_admin(
log, clients, stack, overcloud_ssh_network, overcloud_ssh_user,
overcloud_ssh_key, enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT,
enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT):
hosts = get_overcloud_hosts(stack, overcloud_ssh_network)
if [host for host in hosts if host]:
try:
enable_ssh_admin(log, clients, stack.stack_name, hosts,
overcloud_ssh_user, overcloud_ssh_key,
enable_ssh_timeout, enable_ssh_port_timeout)
except subprocess.CalledProcessError as e:
if e.returncode == 255:
log.error("Couldn't not import keys to one of {}. "
"Check if the user/ip are corrects.\n".format(hosts))
else:
log.error("Unknown error. "
"Original message is:\n{} {}".format(hosts, e))
enable_ssh_admin(
stack,
hosts,
overcloud_ssh_user,
overcloud_ssh_key,
overcloud_ssh_port_timeout
)
else:
raise exceptions.DeploymentError("Cannot find any hosts on '{}'"
" in network '{}'"
.format(stack.stack_name,
overcloud_ssh_network))
def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key,
enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT,
enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT
):
print("Enabling ssh admin (tripleo-admin) for hosts:")
print(" ".join(hosts))
print("Using ssh user %s for initial connection." % ssh_user)
print("Using ssh key at %s for initial connection." % ssh_key)
ssh_options = ("-o ConnectionAttempts=6 "
"-o ConnectTimeout=30 "
"-o StrictHostKeyChecking=no "
"-o PasswordAuthentication=no "
"-o UserKnownHostsFile=/dev/null")
with utils.TempDirs() as tmp_key_dir:
tmp_key_private = os.path.join(tmp_key_dir, 'id_rsa')
tmp_key_public = os.path.join(tmp_key_dir, 'id_rsa.pub')
tmp_key_comment = "TripleO split stack short term key"
tmp_key_command = ["ssh-keygen", "-N", "", "-t", "rsa", "-b", "4096",
"-f", tmp_key_private, "-C", tmp_key_comment]
DEVNULL = open(os.devnull, 'w')
try:
subprocess.check_call(tmp_key_command, stdout=DEVNULL,
stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as exc:
log.error("ssh-keygen has failed with return code {0}".
format(exc.returncode))
else:
log.info("ssh-keygen has been run successfully")
DEVNULL.close()
with open(tmp_key_public) as pubkey:
tmp_key_public_contents = pubkey.read()
with open(tmp_key_private) as privkey:
tmp_key_private_contents = privkey.read()
for host in hosts:
wait_for_ssh_port(host, enable_ssh_port_timeout)
copy_tmp_key_command = ["ssh"] + ssh_options.split()
copy_tmp_key_command += \
["-o", "StrictHostKeyChecking=no",
"-i", ssh_key, "-l", ssh_user, host,
"echo -e '\n%s' >> $HOME/.ssh/authorized_keys" %
tmp_key_public_contents]
print("Inserting TripleO short term key for %s" % host)
subprocess.check_call(copy_tmp_key_command,
stderr=subprocess.STDOUT)
print("Starting ssh admin enablement workflow")
workflow_client = clients.workflow_engine
workflow_input = {
"ssh_user": ssh_user,
"ssh_servers": hosts,
"ssh_private_key": tmp_key_private_contents,
"plan_name": plan_name
}
execution = base.start_workflow(
workflow_client,
'tripleo.access.v1.enable_ssh_admin',
workflow_input=workflow_input
raise exceptions.DeploymentError(
'Cannot find any hosts on "{}" in network "{}"'.format(
stack.stack_name,
overcloud_ssh_network
)
)
start = int(time.time())
while True:
now = int(time.time())
if (now - start) > enable_ssh_timeout:
raise exceptions.DeploymentError(
"ssh admin enablement workflow - TIMED OUT.")
time.sleep(1)
execution = workflow_client.executions.get(execution.id)
state = execution.state
def enable_ssh_admin(stack, hosts, ssh_user, ssh_key, timeout):
"""Run enable ssh admin access playbook.
if state == 'RUNNING':
if (now - start) % ENABLE_SSH_ADMIN_STATUS_INTERVAL\
== 0:
print("ssh admin enablement workflow - RUNNING.")
continue
elif state == 'SUCCESS':
print("ssh admin enablement workflow - COMPLETE.")
break
elif state in ('FAILED', 'ERROR'):
error = "ssh admin enablement workflow - FAILED.\n"
error += execution.to_dict()['state_info']
raise exceptions.DeploymentError(error)
:param stack: Stack data.
:type stack: Object
for host in hosts:
rm_tmp_key_command = ["ssh"] + ssh_options.split()
rm_tmp_key_command += \
["-i", ssh_key, "-l", ssh_user, host,
"sed -i -e '/%s/d' $HOME/.ssh/authorized_keys" %
tmp_key_comment]
print("Removing TripleO short term key from %s" % host)
subprocess.check_call(rm_tmp_key_command, stderr=subprocess.STDOUT)
:param hosts: Machines to connect to.
:type hosts: List
:param ssh_user: SSH access username.
:type ssh_user: String
:param ssh_key: SSH access key.
:type ssh_key: String
:param timeout: Ansible connection timeout
:type timeout: int
"""
print(
'Enabling ssh admin (tripleo-admin) for hosts: {}.'
'\nUsing ssh user "{}" for initial connection.'
'\nUsing ssh key at "{}" for initial connection.'
'\n\nStarting ssh admin enablement playbook'.format(
hosts,
ssh_user,
ssh_key
)
)
with utils.TempDirs() as tmp:
utils.run_ansible_playbook(
playbook='cli-enable-ssh-admin.yaml',
inventory=','.join(hosts),
workdir=tmp,
playbook_dir=ANSIBLE_TRIPLEO_PLAYBOOKS,
key=ssh_key,
ssh_user=ssh_user,
extra_vars={
"ssh_user": ssh_user,
"ssh_servers": hosts,
'tripleo_cloud_name': stack.stack_name
},
ansible_timeout=timeout
)
print("Enabling ssh admin - COMPLETE.")