bc82cc3e90
It seems newer focal images force you to use the ubuntu user account. We already have code that attempts to fallback to ubuntu, but we were not properly catching the error when root fails which caused the whole script to fail. Address this by catching the exception, logging a message, then continuing to the next possible option. If no possible options are found we raise an exception already which handles the worst case situation. Change-Id: Ie6013763daff01063840abce193050b33120a7a2
425 lines
16 KiB
Python
Executable File
425 lines
16 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# Launch a new OpenStack project infrastructure node.
|
|
|
|
# Copyright (C) 2011-2012 OpenStack LLC.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
#
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import argparse
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import tempfile
|
|
import time
|
|
import traceback
|
|
|
|
import dns
|
|
import utils
|
|
|
|
import openstack
|
|
import paramiko
|
|
|
|
from sshclient import SSHException
|
|
|
|
SCRIPT_DIR = os.path.dirname(sys.argv[0])
|
|
|
|
try:
|
|
# This unactionable warning does not need to be printed over and over.
|
|
import requests.packages.urllib3
|
|
requests.packages.urllib3.disable_warnings()
|
|
except:
|
|
pass
|
|
|
|
|
|
class JobDir(object):
|
|
def __init__(self, keep=False):
|
|
self.keep = keep
|
|
self.root = tempfile.mkdtemp()
|
|
self.inventory_root = os.path.join(self.root, 'inventory')
|
|
os.makedirs(self.inventory_root)
|
|
self.hosts = os.path.join(self.inventory_root, 'hosts')
|
|
self.groups = os.path.join(self.inventory_root, 'groups')
|
|
self.key = os.path.join(self.root, 'id_rsa')
|
|
self.ansible_log = os.path.join(self.root, 'ansible_log.txt')
|
|
# XXX if we need more, we might like to setup an ansible.cfg
|
|
# file and use that rather than env vars. See
|
|
# zuul/launcher/ansiblelaunchserver.py as an example
|
|
self.env = os.environ.copy()
|
|
self.env['ANSIBLE_LOG_PATH'] = self.ansible_log
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, etype, value, tb):
|
|
if not self.keep:
|
|
shutil.rmtree(self.root)
|
|
|
|
|
|
def run(cmd, **args):
|
|
args['stdout'] = subprocess.PIPE
|
|
args['stderr'] = subprocess.STDOUT
|
|
print("Running: %s" % (cmd,))
|
|
proc = subprocess.Popen(cmd, **args)
|
|
out = ''
|
|
for line in iter(proc.stdout.readline, b''):
|
|
line = line.decode('utf-8')
|
|
sys.stdout.write(line)
|
|
sys.stdout.flush()
|
|
out += line
|
|
ret = proc.wait()
|
|
print("Return code: %s" % (ret,))
|
|
if ret != 0:
|
|
raise subprocess.CalledProcessError(ret, cmd, out)
|
|
return ret
|
|
|
|
|
|
def stream_syslog(ssh_client):
|
|
try:
|
|
ssh_client.ssh('tail -f /var/log/syslog')
|
|
except Exception:
|
|
print("Syslog stream terminated")
|
|
|
|
|
|
def bootstrap_server(server, key, name, volume_device, keep,
|
|
mount_path, fs_label, environment, timeout, ignore_ipv6):
|
|
|
|
ip = server.public_v4
|
|
ssh_kwargs = dict(pkey=key)
|
|
|
|
print("--- Running initial configuration on host %s ---" % ip)
|
|
ssh_client = None
|
|
for username in ['root', 'ubuntu', 'centos', 'admin']:
|
|
try:
|
|
ssh_client = utils.ssh_connect(ip, username, ssh_kwargs,
|
|
timeout=timeout)
|
|
except SSHException:
|
|
print("Username: " + username + " failed to ssh. "
|
|
"Trying next option.")
|
|
if ssh_client:
|
|
break
|
|
|
|
if not ssh_client:
|
|
raise Exception("Unable to log in via SSH")
|
|
|
|
# cloud-init puts the "please log in as user foo" message and
|
|
# subsequent exit() in root's authorized_keys -- overwrite it with
|
|
# a normal version to get root login working again.
|
|
if username != 'root':
|
|
ssh_client.ssh("sudo cp ~/.ssh/authorized_keys"
|
|
" ~root/.ssh/authorized_keys")
|
|
ssh_client.ssh("sudo chmod 644 ~root/.ssh/authorized_keys")
|
|
ssh_client.ssh("sudo chown root.root ~root/.ssh/authorized_keys")
|
|
|
|
ssh_client = utils.ssh_connect(ip, 'root', ssh_kwargs, timeout=timeout)
|
|
|
|
if not ignore_ipv6:
|
|
# Something up with RAX images that they have the ipv6 interface in
|
|
# /etc/network/interfaces but eth0 hasn't noticed yet; reload it
|
|
ssh_client.ssh('(ifdown eth0 && ifup eth0) || true')
|
|
|
|
if server.public_v6:
|
|
# The server may be waiting on Router Advertisements to configure
|
|
# this address. Wait for that to complete before pinging.
|
|
ssh_client.ssh("bash -c 'count=0 ; "
|
|
"while ! ip addr | grep -q %s && [\"$count\" -le 60 ] ; do "
|
|
" count=$((count + 1)) ; "
|
|
" echo \"Waiting for IPv6 address to configure\" ; "
|
|
" sleep 1 ; "
|
|
"done'" % server.public_v6)
|
|
ssh_client.ssh('ping6 -c5 -Q 0x10 review.openstack.org '
|
|
'|| ping6 -c5 -Q 0x10 wiki.openstack.org')
|
|
|
|
ssh_client.scp(os.path.join(SCRIPT_DIR, 'make_swap.sh'),
|
|
'make_swap.sh')
|
|
ssh_client.ssh('bash -x make_swap.sh')
|
|
|
|
if volume_device:
|
|
ssh_client.scp(os.path.join(SCRIPT_DIR, 'mount_volume.sh'),
|
|
'mount_volume.sh')
|
|
ssh_client.ssh('bash -x mount_volume.sh %s %s %s' %
|
|
(volume_device, mount_path, fs_label))
|
|
|
|
# Zero the ansible inventory cache so that next run finds the new server
|
|
inventory_cache_dir = '/var/cache/ansible/inventory'
|
|
for inventory_cache in os.listdir(inventory_cache_dir):
|
|
os.unlink(os.path.join(inventory_cache_dir, inventory_cache))
|
|
|
|
with JobDir(keep) as jobdir:
|
|
# Update the generated-groups file globally and incorporate it
|
|
# into our inventory
|
|
# Remove cloud and region from the environment to work
|
|
# around a bug in occ
|
|
expand_env = os.environ.copy()
|
|
for env_key in list(expand_env.keys()):
|
|
if env_key.startswith('OS_'):
|
|
expand_env.pop(env_key, None)
|
|
expand_env['ANSIBLE_LOG_PATH'] = jobdir.ansible_log
|
|
|
|
# Write out the private SSH key we generated
|
|
with open(jobdir.key, 'w') as key_file:
|
|
key.write_private_key(key_file)
|
|
os.chmod(jobdir.key, 0o600)
|
|
|
|
if ignore_ipv6:
|
|
host_ip = server.public_v4
|
|
else:
|
|
host_ip = server.interface_ip
|
|
|
|
# Write out inventory
|
|
with open(jobdir.hosts, 'w') as inventory_file:
|
|
inventory_file.write(
|
|
"{host} ansible_host={ip} ansible_user=root {python}".format(
|
|
host=name, ip=host_ip,
|
|
python='ansible_python_interpreter=/usr/bin/python3'))
|
|
|
|
t = threading.Thread(target=stream_syslog, args=(ssh_client,))
|
|
t.daemon = True
|
|
t.start()
|
|
|
|
inventory_list = (
|
|
'/home/zuul/src/opendev.org/opendev/system-config/inventory/base/hosts.yaml',
|
|
'/home/zuul/src/opendev.org/opendev/system-config/inventory/service/groups.yaml',
|
|
'/etc/ansible/hosts/emergency.yaml',
|
|
jobdir.inventory_root,
|
|
)
|
|
inventory_cmds = [v for e in inventory_list for v in ('-i', e)]
|
|
ansible_cmd = ['ansible-playbook', '--flush-cache' ] + \
|
|
inventory_cmds + \
|
|
['-l', name,
|
|
'--private-key={key}'.format(key=jobdir.key),
|
|
"--ssh-common-args='-o StrictHostKeyChecking=no'",
|
|
'-e', 'target={name}'.format(name=name)]
|
|
|
|
# Run the base playbook limited to just this server we just created
|
|
for playbook in [
|
|
'set-hostnames.yaml',
|
|
'base.yaml',
|
|
'apply-package-updates.yaml',
|
|
]:
|
|
run(ansible_cmd + [
|
|
os.path.join(SCRIPT_DIR, '..', 'playbooks', playbook)],
|
|
env=jobdir.env)
|
|
|
|
try:
|
|
ssh_client.ssh("reboot")
|
|
except Exception as e:
|
|
# Some init system kill the connection too fast after reboot.
|
|
# Deal with it by ignoring ssh errors when rebooting.
|
|
if e.rc == -1:
|
|
pass
|
|
else:
|
|
raise
|
|
|
|
|
|
def build_server(cloud, name, image, flavor,
|
|
volume, keep, network, boot_from_volume, config_drive,
|
|
mount_path, fs_label, availability_zone, environment,
|
|
volume_size, timeout, ignore_ipv6):
|
|
key = None
|
|
server = None
|
|
|
|
create_kwargs = dict(image=image, flavor=flavor, name=name,
|
|
reuse_ips=False, wait=True,
|
|
boot_from_volume=boot_from_volume,
|
|
volume_size=volume_size,
|
|
network=network,
|
|
config_drive=config_drive,
|
|
timeout=timeout)
|
|
|
|
if availability_zone:
|
|
create_kwargs['availability_zone'] = availability_zone
|
|
|
|
if volume:
|
|
create_kwargs['volumes'] = [volume]
|
|
|
|
key_name = 'launch-%i' % (time.time())
|
|
key = paramiko.RSAKey.generate(2048)
|
|
public_key = key.get_name() + ' ' + key.get_base64()
|
|
cloud.create_keypair(key_name, public_key)
|
|
create_kwargs['key_name'] = key_name
|
|
|
|
try:
|
|
server = cloud.create_server(**create_kwargs)
|
|
except Exception:
|
|
try:
|
|
cloud.delete_keypair(key_name)
|
|
except Exception:
|
|
print("Exception encountered deleting keypair:")
|
|
traceback.print_exc()
|
|
raise
|
|
|
|
try:
|
|
cloud.delete_keypair(key_name)
|
|
|
|
server = cloud.get_openstack_vars(server)
|
|
|
|
if volume:
|
|
volume = cloud.get_volume(volume)
|
|
volume_device = cloud.get_volume_attach_device(volume,
|
|
server['id'])
|
|
else:
|
|
volume_device = None
|
|
bootstrap_server(server, key, name, volume_device, keep,
|
|
mount_path, fs_label, environment, timeout, ignore_ipv6)
|
|
print('UUID=%s\nIPV4=%s\nIPV6=%s\n' % (
|
|
server.id, server.public_v4, server.public_v6))
|
|
except Exception:
|
|
print("****")
|
|
print("Server %s failed to build!" % (server.id))
|
|
try:
|
|
volumes = []
|
|
if boot_from_volume:
|
|
volumes = cloud.get_volumes(server)
|
|
if keep:
|
|
print("Keeping as requested")
|
|
# Write out the private SSH key we generated, as we
|
|
# may not have got far enough for ansible to run
|
|
with open('/tmp/%s.id_rsa' % server.id, 'w') as key_file:
|
|
key.write_private_key(key_file)
|
|
os.chmod(key_file.name, 0o600)
|
|
print("Private key saved in %s" % key_file.name)
|
|
print("Run to delete:")
|
|
print(" openstack server delete %s" % (server.id))
|
|
for attachment in volumes:
|
|
print(" openstack volume delete %s" % (attachment.id))
|
|
else:
|
|
cloud.delete_server(server.id, delete_ips=True)
|
|
for attachment in volumes:
|
|
cloud.delete_volume(attachment.id)
|
|
except Exception:
|
|
print("Exception encountered deleting server:")
|
|
traceback.print_exc()
|
|
print("The original exception follows:")
|
|
print("****")
|
|
# Raise the important exception that started this
|
|
raise
|
|
|
|
return server
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("name", help="server name")
|
|
parser.add_argument("--cloud", dest="cloud", required=True,
|
|
help="cloud name")
|
|
parser.add_argument("--region", dest="region",
|
|
help="cloud region")
|
|
parser.add_argument("--flavor", dest="flavor", default='1GB',
|
|
help="name (or substring) of flavor")
|
|
parser.add_argument("--image", dest="image",
|
|
default="Ubuntu 18.04 LTS (Bionic Beaver) (Cloud)",
|
|
help="image name")
|
|
parser.add_argument("--environment", dest="environment",
|
|
help="Puppet environment to use",
|
|
default=None)
|
|
parser.add_argument("--volume", dest="volume",
|
|
help="UUID of volume to attach to the new server.",
|
|
default=None)
|
|
parser.add_argument("--mount-path", dest="mount_path",
|
|
help="Path to mount cinder volume at.",
|
|
default=None)
|
|
parser.add_argument("--fs-label", dest="fs_label",
|
|
help="FS label to use when mounting cinder volume.",
|
|
default=None)
|
|
parser.add_argument("--boot-from-volume", dest="boot_from_volume",
|
|
help="Create a boot volume for the server and use it.",
|
|
action='store_true',
|
|
default=False)
|
|
parser.add_argument("--volume-size", dest="volume_size",
|
|
help="Size of volume (GB) for --boot-from-volume",
|
|
default="50")
|
|
parser.add_argument("--keep", dest="keep",
|
|
help="Don't clean up or delete the server on error.",
|
|
action='store_true',
|
|
default=False)
|
|
parser.add_argument("--verbose", dest="verbose", default=False,
|
|
action='store_true',
|
|
help="Be verbose about logging cloud actions")
|
|
parser.add_argument("--network", dest="network", default=None,
|
|
help="network label to attach instance to")
|
|
parser.add_argument("--config-drive", dest="config_drive",
|
|
help="Boot with config_drive attached.",
|
|
action='store_true',
|
|
default=False)
|
|
parser.add_argument("--timeout", dest="timeout",
|
|
help="Increase timeouts (default 600s)",
|
|
type=int, default=600)
|
|
parser.add_argument("--ignore_ipv6", dest="ignore_ipv6",
|
|
help="Ignore IPv6 addresses from API",
|
|
action='store_true', default=False)
|
|
|
|
parser.add_argument("--az", dest="availability_zone", default=None,
|
|
help="AZ to boot in.")
|
|
options = parser.parse_args()
|
|
|
|
openstack.enable_logging(debug=options.verbose)
|
|
|
|
cloud_kwargs = {}
|
|
if options.region:
|
|
cloud_kwargs['region_name'] = options.region
|
|
cloud = openstack.connect(cloud=options.cloud, **cloud_kwargs)
|
|
|
|
flavor = cloud.get_flavor(options.flavor)
|
|
if flavor:
|
|
print("Found flavor", flavor.name)
|
|
else:
|
|
print("Unable to find matching flavor; flavor list:")
|
|
for i in cloud.list_flavors():
|
|
print(i.name)
|
|
sys.exit(1)
|
|
|
|
image = cloud.get_image_exclude(options.image, 'deprecated')
|
|
if image:
|
|
print("Found image", image.name)
|
|
else:
|
|
print("Unable to find matching image; image list:")
|
|
for i in cloud.list_images():
|
|
print(i.name)
|
|
sys.exit(1)
|
|
|
|
# NOTE(ianw): 2019-06-26 for whatever reason OVH assigns an IPv6
|
|
# address that is seen in API queries, but it is not in the host
|
|
# metadata so the interface isn't autoconfigured. Auto skip it to
|
|
# avoid this bombing out.
|
|
if "-ovh" in options.cloud:
|
|
print("Ignoring IPv6 for OVH cloud instances")
|
|
options.ignore_ipv6 = True
|
|
|
|
server = build_server(cloud, options.name, image, flavor,
|
|
options.volume, options.keep,
|
|
options.network, options.boot_from_volume,
|
|
options.config_drive,
|
|
options.mount_path, options.fs_label,
|
|
options.availability_zone,
|
|
options.environment, options.volume_size,
|
|
options.timeout, options.ignore_ipv6)
|
|
dns.print_dns(cloud, server)
|
|
print("If this is a server that is expected to send email (ask, review,")
|
|
print("lists, etc) double check that the server's IPs are not listed on")
|
|
print("the spamhaus pbl.\n")
|
|
print("URLs to check:")
|
|
print("https://www.spamhaus.org/query/ip/%s" % server.public_v4)
|
|
print("https://www.spamhaus.org/query/ip/%s" % server.public_v6)
|
|
print()
|
|
print("When requesting an exception you can use the")
|
|
print("infra-root@openstack.org email address to verify the responsible")
|
|
print("party.")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|