This enables basic clustering functionality. We add: tools/cluster/cluster/daemon.py: A server that handles validation of cluster passwords. tools/cluster/cluster/client.py: A client for this server. Important Note: This prototype does not support TLS, and the functionality in the client and server is basic. Before we roll clustering out to production, we need to have those two chat over TLS, and be much more careful about verifying credentials. Also included ... Various fixes and changes to the init script and config templates to support cluster configuration, and allow for the fact that we may have endpoint references for two network ips. Updates to snapcraft.yaml, adding the new tooling. A more formalized config infrastructure. It's still a TODO to move the specification out of the implicit definition in the install hook, and into a nice, explicit, well documented yaml file. Added nesting to the Question classes in the init script, as well as strings pointing at config keys, rather than having the config be implicitly indicated by the Question subclass' name. (This allows us to put together a config spec that doesn't require the person reading the spec to understand what Questions are, and how they are implemented.) Renamed and unified the "unit" and "lint" tox environments, to allow for the multiple Python tools that we want to lint and test. Added hooks in the init script to make it possible to do automated testing, and added an automated test for a cluster. Run with "tox -e cluster". Added cirros image to snap, to work around sporadic issues downloading it from download.cirros.net. Removed ping logic from snap, to workaround failures in gate. Need to add it back in once we fix them. Change-Id: I44ccd16168a7ed41486464df8c9e22a14d71ccfdchanges/43/689943/11
@ -0,0 +1,10 @@ | |||
server { | |||
listen 8011; | |||
error_log syslog:server=unix:/dev/log; | |||
access_log syslog:server=unix:/dev/log; | |||
location / { | |||
include {{ snap }}/usr/conf/uwsgi_params; | |||
uwsgi_param SCRIPT_NAME ''; | |||
uwsgi_pass unix://{{ snap_common }}/run/keystone-api.sock; | |||
} | |||
} |
@ -1,13 +1,13 @@ | |||
[keystone_authtoken] | |||
auth_uri = http://{{ extgateway }}:5000 | |||
auth_url = http://{{ extgateway }}:5000 | |||
memcached_servers = {{ extgateway }}:11211 | |||
auth_uri = http://{{ control_ip }}:5000 | |||
auth_url = http://{{ control_ip }}:5000 | |||
memcached_servers = {{ control_ip }}:11211 | |||
auth_type = password | |||
project_domain_name = default | |||
user_domain_name = default | |||
project_name = service | |||
username = glance | |||
password = glance | |||
password = {{ glance_password }} | |||
[paste_deploy] | |||
flavor = keystone |
@ -1,2 +1,2 @@ | |||
[database] | |||
connection = mysql+pymysql://glance:glance@{{ extgateway }}/glance | |||
connection = mysql+pymysql://glance:glance@{{ control_ip }}/glance |
@ -1,2 +1,2 @@ | |||
[database] | |||
connection = mysql+pymysql://keystone:keystone@{{ extgateway }}/keystone | |||
connection = mysql+pymysql://keystone:keystone@{{ control_ip }}/keystone |
@ -0,0 +1,2 @@ | |||
[DEFAULT] | |||
transport_url = rabbit://openstack:rabbitmq@{{ control_ip }} |
@ -1,2 +1,2 @@ | |||
[database] | |||
connection = mysql+pymysql://neutron:neutron@{{ extgateway }}/neutron | |||
connection = mysql+pymysql://neutron:neutron@{{ control_ip }}/neutron |
@ -1,5 +1,5 @@ | |||
[database] | |||
connection = mysql+pymysql://nova:nova@{{ extgateway }}/nova | |||
connection = mysql+pymysql://nova:nova@{{ control_ip }}/nova | |||
[api_database] | |||
connection = mysql+pymysql://nova_api:nova_api@{{ extgateway }}/nova_api | |||
connection = mysql+pymysql://nova_api:nova_api@{{ control_ip }}/nova_api |
@ -1,2 +1,2 @@ | |||
[glance] | |||
api_servers = http://{{ extgateway }}:9292 | |||
api_servers = http://{{ control_ip }}:9292 |
@ -1,13 +1,13 @@ | |||
[keystone_authtoken] | |||
auth_uri = http://{{ extgateway }}:5000 | |||
auth_url = http://{{ extgateway }}:5000 | |||
memcached_servers = {{ extgateway }}:11211 | |||
auth_uri = http://{{ control_ip }}:5000 | |||
auth_url = http://{{ control_ip }}:5000 | |||
memcached_servers = {{ control_ip }}:11211 | |||
auth_type = password | |||
project_domain_name = default | |||
user_domain_name = default | |||
project_name = service | |||
username = nova | |||
password = nova | |||
password = {{ nova_password }} | |||
[paste_deploy] | |||
flavor = keystone |
@ -1,13 +1,13 @@ | |||
[neutron] | |||
url = http://{{ extgateway }}:9696 | |||
auth_url = http://{{ extgateway }}:5000 | |||
memcached_servers = {{ extgateway }}:11211 | |||
url = http://{{ control_ip }}:9696 | |||
auth_url = http://{{ control_ip }}:5000 | |||
memcached_servers = {{ control_ip }}:11211 | |||
auth_type = password | |||
project_domain_name = default | |||
user_domain_name = default | |||
region_name = microstack | |||
project_name = service | |||
username = neutron | |||
password = neutron | |||
password = {{ neutron_password }} | |||
service_metadata_proxy = True | |||
metadata_proxy_shared_secret = supersecret |
@ -1,2 +1,2 @@ | |||
[DEFAULT] | |||
transport_url = rabbit://openstack:rabbitmq@{{ extgateway }} | |||
transport_url = rabbit://openstack:rabbitmq@{{ control_ip }} |
@ -0,0 +1,131 @@ | |||
#!/usr/bin/env python | |||
""" | |||
cluster_test.py | |||
This is a test to verify that we can setup a small, two node cluster. | |||
The host running this test must have at least 16GB of RAM, four cpu | |||
cores, a large amount of disk space, and the ability to run multipass | |||
vms. | |||
""" | |||
import json | |||
import os | |||
import petname | |||
import sys | |||
import unittest | |||
sys.path.append(os.getcwd()) | |||
from tests.framework import Framework, check, check_output, call # noqa E402 | |||
os.environ['MULTIPASS'] = 'true' # TODO better way to do this. | |||
class TestCluster(Framework): | |||
INIT_FLAG = 'control' | |||
def _compute_node(self, channel='dangerous'): | |||
"""Make a compute node. | |||
TODO: refactor framework so that we can fold a lot of this | |||
into the parent framework. There's a lot of dupe code here. | |||
""" | |||
machine = petname.generate() | |||
prefix = ['multipass', 'exec', machine, '--'] | |||
check('multipass', 'launch', '--cpus', '2', '--mem', '8G', | |||
self.DISTRO, '--name', machine) | |||
check('multipass', 'copy-files', self.SNAP, '{}:'.format(machine)) | |||
check(*prefix, 'sudo', 'snap', 'install', '--classic', | |||
'--{}'.format(channel), self.SNAP) | |||
return machine, prefix | |||
def test_cluster(self): | |||
# After the setUp step, we should have a control node running | |||
# in a multipass vm. Let's look up its cluster password and ip | |||
# address. | |||
openstack = '/snap/bin/microstack.openstack' | |||
cluster_password = check_output(*self.PREFIX, 'sudo', 'snap', | |||
'get', 'microstack', | |||
'config.cluster.password') | |||
control_ip = check_output(*self.PREFIX, 'sudo', 'snap', | |||
'get', 'microstack', | |||
'config.network.control-ip') | |||
self.assertTrue(cluster_password) | |||
self.assertTrue(control_ip) | |||
compute_machine, compute_prefix = self._compute_node() | |||
# TODO add the following to args for init | |||
check(*compute_prefix, 'sudo', 'snap', 'set', 'microstack', | |||
'config.network.control-ip={}'.format(control_ip)) | |||
check(*compute_prefix, 'sudo', 'microstack.init', '--compute', | |||
'--cluster-password', cluster_password, '--debug') | |||
# Verify that our services look setup properly on compute node. | |||
services = check_output( | |||
*compute_prefix, 'systemctl', 'status', 'snap.microstack.*', | |||
'--no-page') | |||
self.assertTrue('nova-compute' in services) | |||
self.assertFalse('keystone-' in services) | |||
check(*compute_prefix, '/snap/bin/microstack.launch', 'cirros', | |||
'--name', 'breakfast', '--retry', | |||
'--availability-zone', 'nova:{}'.format(compute_machine)) | |||
# TODO: verify horizon dashboard on control node. | |||
# Verify endpoints | |||
compute_ip = check_output(*compute_prefix, 'sudo', 'snap', | |||
'get', 'microstack', | |||
'config.network.compute-ip') | |||
self.assertFalse(compute_ip == control_ip) | |||
# Ping the instance | |||
ip = None | |||
servers = check_output(*compute_prefix, openstack, | |||
'server', 'list', '--format', 'json') | |||
servers = json.loads(servers) | |||
for server in servers: | |||
if server['Name'] == 'breakfast': | |||
ip = server['Networks'].split(",")[1].strip() | |||
break | |||
self.assertTrue(ip) | |||
pings = 1 | |||
max_pings = 60 # ~1 minutes | |||
# Ping the machine from the control node (we don't have | |||
# networking wired up for the other nodes). | |||
while not call(*self.PREFIX, 'ping', '-c1', '-w1', ip): | |||
pings += 1 | |||
if pings > max_pings: | |||
self.assertFalse( | |||
True, | |||
msg='Max pings reached for instance on {}!'.format( | |||
compute_machine)) | |||
self.passed = True | |||
# Compute machine cleanup | |||
check('sudo', 'multipass', 'delete', compute_machine) | |||
if __name__ == '__main__': | |||
# Run our tests, ignoring deprecation warnings and warnings about | |||
# unclosed sockets. (TODO: setup a selenium server so that we can | |||
# move from PhantomJS, which is deprecated, to to Selenium headless.) | |||
unittest.main(warnings='ignore') |
@ -0,0 +1,41 @@ | |||
import json | |||
import requests | |||
from cluster.shell import check, check_output, write_tunnel_config | |||
def join(): | |||
"""Join an existing cluster as a compute node.""" | |||
config = json.loads(check_output('snapctl', 'get', 'config')) | |||
password = config['cluster']['password'] | |||
control_ip = config['network']['control-ip'] | |||
my_ip = config['network']['compute-ip'] | |||
if not password: | |||
raise Exception("No cluster password specified!") | |||
resp = requests.post( | |||
'http://{}:10002/join'.format(control_ip), | |||
json={'password': password, 'ip_address': my_ip}) | |||
if resp.status_code != 200: | |||
# TODO better error and formatting. | |||
raise Exception('Failed to get info from control node: {}'.format( | |||
resp.json)) | |||
resp = resp.json() | |||
# TODO: add better error handling to the below | |||
os_password = resp['config']['credentials']['os-password'] | |||
# Write out tunnel config and restart neutron openvswitch agent. | |||
write_tunnel_config(my_ip) | |||
check('snapctl', 'restart', 'microstack.neutron-openvswitch-agent') | |||
# Set passwords and such | |||
check('snapctl', 'set', 'config.credentials.os-password={}'.format( | |||
os_password)) | |||
if __name__ == '__main__': | |||
join() |
@ -0,0 +1,55 @@ | |||
import json | |||
from flask import Flask, request | |||
from cluster.shell import check, check_output, write_tunnel_config | |||
app = Flask(__name__) | |||
class Unauthorized(Exception): | |||
pass | |||
def join_info(password, ip_address): | |||
our_password = check_output('snapctl', 'get', 'config.cluster.password') | |||
if password.strip() != our_password.strip(): | |||
raise Unauthorized() | |||
# Load config | |||
# TODO: be selective about what we return. For now, we just get everything. | |||
config = json.loads(check_output('snapctl', 'get', 'config')) | |||
# Write out tunnel config and restart neutron openvswitch agent. | |||
write_tunnel_config(config['network']['control-ip']) | |||
check('snapctl', 'restart', 'microstack.neutron-openvswitch-agent') | |||
info = {'config': config} | |||
return info | |||
@app.route('/') | |||
def home(): | |||
status = { | |||
'status': 'running', | |||
'info': 'Microstack clustering daemon.' | |||
} | |||
return json.dumps(status) | |||
@app.route('/join', methods=['POST']) | |||
def join(): | |||
req = request.json # TODO: better error messages on failed parse. | |||
password = req.get('password') | |||
ip_address = req.get('ip_address') | |||
if not password: | |||
return 'No password specified', 500 | |||
try: | |||
return json.dumps(join_info(password, ip_address)) | |||
except Unauthorized: | |||
return (json.dumps({'error': 'Incorrect password.'}), 500) |
@ -0,0 +1,50 @@ | |||
import os | |||
import pymysql | |||
import subprocess | |||
def sql(cmd) -> None: | |||
"""Execute some SQL! | |||
Really simply wrapper around a pymysql connection, suitable for | |||
passing the limited CREATE and GRANT commands that we need to pass | |||
here. | |||
:param cmd: sql to execute. | |||
# TODO: move this into a shared shell library. | |||
""" | |||
mysql_conf = '${SNAP_USER_COMMON}/etc/mysql/my.cnf'.format(**os.environ) | |||
connection = pymysql.connect(host='localhost', user='root', | |||
read_default_file=mysql_conf) | |||
with connection.cursor() as cursor: | |||
cursor.execute(cmd) | |||
def check_output(*args): | |||
"""Execute a shell command, returning the output of the command.""" | |||
return subprocess.check_output(args, env=os.environ, | |||
universal_newlines=True).strip() | |||
def check(*args): | |||
"""Execute a shell command, raising an error on failed excution. | |||
:param args: strings to be composed into the bash call. | |||
""" | |||
return subprocess.check_call(args, env=os.environ) | |||
def write_tunnel_config(local_ip): | |||
"""Write tunnel config file for neutron agent.""" | |||
path_ = '{SNAP_COMMON}/etc/neutron/neutron.conf.d/tunnel.conf'.format( | |||
**os.environ) | |||
with open(path_, 'w') as file_: | |||
file_.write("""\ | |||
[OVS] | |||
local_ip = {local_ip} | |||
""".format(local_ip=local_ip)) |
@ -0,0 +1,2 @@ | |||
flask | |||
requests |
@ -0,0 +1,13 @@ | |||
from setuptools import setup, find_packages | |||
setup( | |||
name="microstack_cluster", | |||
description="Clustering client and server.", | |||
packages=find_packages(exclude=("tests",)), | |||
version="0.0.1", | |||
entry_points={ | |||
'console_scripts': [ | |||
'microstack_join = cluster.client:join', | |||
], | |||
} | |||
) |
@ -0,0 +1,87 @@ | |||
from getpass import getpass | |||
from init.questions.question import Question, InvalidAnswer | |||
from init.shell import check, check_output, fetch_ip_address | |||
class Role(Question): | |||
_type = 'string' | |||
config_key = 'config.cluster.role' | |||
_question = "What is this machines' role? (control/compute)" | |||
_valid_roles = ('control', 'compute') | |||
interactive = True | |||
def _input_func(self, prompt): | |||
if not self.interactive: | |||
return | |||
for _ in range(0, 3): | |||
role = input("{} > ".format(self._question)) | |||
if role in self._valid_roles: | |||
return role | |||
print('Role must be either "control" or "compute"') | |||
raise InvalidAnswer('Too many failed attempts.') | |||
class Password(Question): | |||
_type = 'string' # TODO: type password support | |||
config_key = 'config.cluster.password' | |||
_question = 'Please enter a cluster password > ' | |||
interactive = True | |||
def _input_func(self, prompt): | |||
if not self.interactive: | |||
return | |||
# Get rid of 'default=' string the parent class has added to prompt. | |||
prompt = self._question | |||
for _ in range(0, 3): | |||
password0 = getpass(prompt) | |||
password1 = getpass('Please re-enter password > ') | |||
if password0 == password1: | |||
return password0 | |||
print("Passwords don't match!") | |||
raise InvalidAnswer('Too many failed attempts.') | |||
class ControlIp(Question): | |||
_type = 'string' | |||
config_key = 'config.network.control-ip' | |||
_question = 'Please enter the ip address of the control node' | |||
interactive = True | |||
def _load(self): | |||
if check_output( | |||
'snapctl', 'get', 'config.cluster.role') == 'control': | |||
return fetch_ip_address() or super()._load() | |||
return super()._load() | |||
class ComputeIp(Question): | |||
_type = 'string' | |||
config_key = 'config.network.compute-ip' | |||
_question = 'Please enter the ip address of this node' | |||
interactive = True | |||
def _load(self): | |||
if check_output( | |||
'snapctl', 'get', 'config.cluster.role') == 'compute': | |||
return fetch_ip_address() or super().load() | |||
return super()._load() | |||
def ask(self): | |||
# If we are a control node, skip this question. | |||
role = check_output('snapctl', 'get', Role.config_key) | |||
if role == 'control': | |||
ip = check_output('snapctl', 'get', ControlIp.config_key) | |||
check('snapctl', 'set', '{}={}'.format(self.config_key, ip)) | |||
return | |||
return super().ask() |
@ -0,0 +1,24 @@ | |||
#!/usr/bin/env/python3 | |||
from init.shell import default_network, check | |||
from init.config import log # TODO name log. | |||
def main(): | |||
try: | |||
ip, gate, cidr = default_network() | |||
except Exception: | |||
# TODO: more specific exception handling. | |||
log.exception( | |||
'Could not determine default network info. ' | |||
'Falling back on 10.20.20.1') | |||
return | |||
check('snapctl', 'set', 'config.network.ext-gateway={}'.format(gate)) | |||
check('snapctl', 'set', 'config.network.ext-cidr={}'.format(cidr)) | |||
check('snapctl', 'set', 'config.network.control-ip={}'.format(ip)) | |||
check('snapctl', 'set', 'config.network.control-ip={}'.format(ip)) | |||
if __name__ == '__main__': | |||
main() |
@ -1,3 +1,4 @@ | |||
netaddr | |||
netifaces | |||
pymysql | |||
wget | |||
inflection |