0deff5f1fe
Take number of retries into account, set default to 5. Change-Id: I820074d87b9e82a6b51cc63ecc70d0cc179a244c
594 lines
20 KiB
Python
Executable File
594 lines
20 KiB
Python
Executable File
#!/usr/bin/python
|
|
#
|
|
# Copyright 2014 Huawei Technologies Co. Ltd
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""binary to deploy a cluster by compass client api."""
|
|
import logging
|
|
import re
|
|
import requests
|
|
import time
|
|
|
|
from compass.apiclient.restful import Client
|
|
from compass.utils import flags
|
|
from compass.utils import logsetting
|
|
|
|
|
|
flags.add('compass_server',
|
|
help='compass server url',
|
|
default='http://127.0.0.1/api')
|
|
flags.add('switch_ips',
|
|
help='comma seperated switch ips',
|
|
default='')
|
|
flags.add('switch_credential',
|
|
help='comma separated <credential key>=<credential value>',
|
|
default='version=v2c,community=public')
|
|
flags.add('switch_max_retries', type='int',
|
|
help='max retries of poll switch',
|
|
default=5)
|
|
flags.add('switch_retry_interval', type='int',
|
|
help='interval to repoll switch',
|
|
default=10)
|
|
flags.add_bool('poll_switches',
|
|
help='if the client polls switches',
|
|
default=True)
|
|
flags.add('machines',
|
|
help='comma separated mac addresses of machines',
|
|
default='')
|
|
flags.add('adapter_os_name',
|
|
help='adapter os name',
|
|
default=r'(?i)centos.*')
|
|
flags.add('adapter_target_system',
|
|
help='adapter target system name',
|
|
default='openstack')
|
|
flags.add('cluster_name',
|
|
help='cluster name',
|
|
default='cluster1')
|
|
flags.add('credentials',
|
|
help=(
|
|
'comma separated credentials formatted as '
|
|
'<credential_name>:<username>=<password>'
|
|
),
|
|
default=(
|
|
'server:root=root,service:service=service,'
|
|
'console:console=console'
|
|
))
|
|
flags.add('networking',
|
|
help=(
|
|
'semicomma seperated network property and its value '
|
|
'<network_property_name>=<value>'
|
|
),
|
|
default='')
|
|
flags.add('partitions',
|
|
help=(
|
|
'comma seperated partitions '
|
|
'<partition name>:<partition_type>=<partition_value>'
|
|
),
|
|
default='tmp:percentage=10,var:percentage=20,home:percentage=40')
|
|
flags.add('host_roles',
|
|
help=(
|
|
'semicomma separated host roles '
|
|
'<hostname>=<comma separated roles>',
|
|
),
|
|
default='')
|
|
flags.add('deployment_timeout',
|
|
help='deployment timeout in minutes',
|
|
default=60)
|
|
flags.add('progress_update_check_interval',
|
|
help='progress update status check interval in seconds',
|
|
default=60)
|
|
flags.add('dashboard_role',
|
|
help='dashboard role name',
|
|
default='os-dashboard')
|
|
flags.add('dashboard_link_pattern',
|
|
help='dashboard link pattern',
|
|
default=r'(?m)(http://\d+\.\d+\.\d+\.\d+:5000/v2\.0)')
|
|
|
|
|
|
def _get_client():
|
|
"""get apiclient object."""
|
|
return Client(flags.OPTIONS.compass_server)
|
|
|
|
|
|
def _get_machines(client):
|
|
"""get machines connected to the switch."""
|
|
status, resp = client.get_machines()
|
|
logging.info(
|
|
'get all machines status: %s, resp: %s', status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to get machines'
|
|
raise Exception(msg)
|
|
|
|
machines_to_add = set([
|
|
machine for machine in flags.OPTIONS.machines.split(',')
|
|
if machine
|
|
])
|
|
logging.info('machines to add: %s', list(machines_to_add))
|
|
machines = {}
|
|
for machine in resp['machines']:
|
|
mac = machine['mac']
|
|
if mac in machines_to_add:
|
|
machines[machine['id']] = mac
|
|
|
|
logging.info('found machines: %s', machines.values())
|
|
|
|
if set(machines.values()) != machines_to_add:
|
|
msg = 'machines %s is missing' % (
|
|
list(machines_to_add - set(machines.values()))
|
|
)
|
|
raise Exception(msg)
|
|
|
|
return machines
|
|
|
|
|
|
def _poll_switches(client):
|
|
"""get all switches."""
|
|
status, resp = client.get_switches()
|
|
logging.info('get all switches status: %s resp: %s', status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to get switches'
|
|
raise Exception(msg)
|
|
|
|
all_switches = {}
|
|
for switch in resp['switches']:
|
|
all_switches[switch['ip']] = switch
|
|
|
|
# add a switch.
|
|
switch_ips = [
|
|
switch_ip for switch_ip in flags.OPTIONS.switch_ips.split(',')
|
|
if switch_ip
|
|
]
|
|
switch_credential = dict([
|
|
credential.split('=', 1)
|
|
for credential in flags.OPTIONS.switch_credential.split(',')
|
|
if '=' in credential
|
|
])
|
|
for switch_ip in switch_ips:
|
|
if switch_ip not in all_switches:
|
|
status, resp = client.add_switch(switch_ip, **switch_credential)
|
|
logging.info('add switch %s status: %s resp: %s',
|
|
switch_ip, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to add switch %s' % switch_ip
|
|
raise Exception(msg)
|
|
|
|
all_switches[switch_ip] = resp['switch']
|
|
else:
|
|
logging.info('switch %s is already added', switch_ip)
|
|
|
|
remain_retries = flags.OPTIONS.switch_max_retries
|
|
while True:
|
|
time.sleep(flags.OPTIONS.switch_retry_interval)
|
|
for switch_ip, switch in all_switches.items():
|
|
switch_id = switch['id']
|
|
# if the switch is not in under_monitoring, wait for the
|
|
# poll switch task update the switch information and change
|
|
# the switch state.
|
|
logging.info(
|
|
'waiting for the switch %s into under_monitoring',
|
|
switch_ip)
|
|
status, resp = client.get_switch(switch_id)
|
|
logging.info('get switch %s status: %s, resp: %s',
|
|
switch_ip, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to get switch %s' % switch_ip
|
|
raise Exception(msg)
|
|
|
|
switch = resp['switch']
|
|
all_switches[switch_ip] = switch
|
|
|
|
if switch['state'] == 'notsupported':
|
|
msg = 'switch %s is not supported', switch_ip
|
|
raise Exception(msg)
|
|
elif switch['state'] in ['initialized', 'repolling']:
|
|
logging.info('switch %s is not updated', switch_ip)
|
|
elif switch['state'] == 'under_monitoring':
|
|
logging.info('switch %s is ready', switch_ip)
|
|
try:
|
|
return _get_machines(client)
|
|
except Exception:
|
|
logging.error('failed to get all machines')
|
|
|
|
if remain_retries > 0:
|
|
for switch_ip, switch in all_switches.items():
|
|
status, resp = client.update_switch(
|
|
switch_id, switch_ip, **switch_credential)
|
|
if status >= 400:
|
|
msg = 'failed to update switch %s' % switch_ip
|
|
raise Exception(msg)
|
|
|
|
remain_retries -= 1
|
|
else:
|
|
msg = 'max retries reached'
|
|
raise Exception(msg)
|
|
|
|
|
|
def _get_adapter(client):
|
|
"""get adapter."""
|
|
status, resp = client.get_adapters()
|
|
logging.info('get all adapters status: %s, resp: %s', status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to get adapters'
|
|
raise Exception(msg)
|
|
|
|
os_name_pattern = flags.OPTIONS.adapter_os_name
|
|
os_name_re = re.compile(os_name_pattern)
|
|
target_system = flags.OPTIONS.adapter_target_system
|
|
adapter_id = None
|
|
for adapter in resp['adapters']:
|
|
if (
|
|
os_name_re.match(adapter['os']) and
|
|
target_system == adapter['target_system']
|
|
):
|
|
adapter_id = adapter['id']
|
|
|
|
if not adapter_id:
|
|
msg = 'no adapter found for %s and %s' % (
|
|
os_name_pattern, target_system)
|
|
raise Exception(msg)
|
|
|
|
logging.info('adpater for deploying a cluster: %s', adapter_id)
|
|
return adapter_id
|
|
|
|
|
|
def _add_cluster(client, adapter_id, machines):
|
|
"""add a cluster."""
|
|
cluster_name = flags.OPTIONS.cluster_name
|
|
status, resp = client.add_cluster(
|
|
cluster_name=cluster_name, adapter_id=adapter_id)
|
|
logging.info('add cluster %s status: %s, resp: %s',
|
|
cluster_name, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to add cluster %s with adapter %s' % (
|
|
cluster_name, adapter_id)
|
|
raise Exception(msg)
|
|
|
|
cluster = resp['cluster']
|
|
cluster_id = cluster['id']
|
|
|
|
# add hosts to the cluster.
|
|
status, resp = client.add_hosts(
|
|
cluster_id=cluster_id,
|
|
machine_ids=machines.keys())
|
|
logging.info('add hosts to cluster %s status: %s, resp: %s',
|
|
cluster_id, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to add machines %s to cluster %s' % (
|
|
machines, cluster_name)
|
|
raise Exception(msg)
|
|
|
|
host_ids = []
|
|
for host in resp['cluster_hosts']:
|
|
host_ids.append(host['id'])
|
|
|
|
logging.info('added hosts in cluster %s: %s', cluster_id, host_ids)
|
|
if len(host_ids) != len(machines):
|
|
msg = 'machines %s to add to the cluster %s while hosts %s' % (
|
|
machines, cluster_name, host_ids)
|
|
raise Exception(msg)
|
|
|
|
return {cluster_id: host_ids}
|
|
|
|
|
|
def _set_cluster_security(client, cluster_hosts):
|
|
"""set cluster security."""
|
|
credentials = [
|
|
credential for credential in flags.OPTIONS.credentials.split(',')
|
|
if ':' in credential
|
|
]
|
|
logging.info('set cluster security: %s', credentials)
|
|
credential_mapping = {}
|
|
for credential in credentials:
|
|
credential_name, username_and_password = credential.split(':', 1)
|
|
if not credential_name:
|
|
raise Exception('there is no credential name in %s' % credential)
|
|
|
|
if not username_and_password:
|
|
raise Exception('there is no username/password in %s' % credential)
|
|
|
|
if '=' not in username_and_password:
|
|
raise Exception('there is no = in %s' % username_and_password)
|
|
|
|
username, password = username_and_password.split('=', 1)
|
|
if not username or not password:
|
|
raise Exception(
|
|
'there is no username or password in %s' % (
|
|
username_and_password))
|
|
|
|
credential_mapping['%s_username' % credential_name] = username
|
|
credential_mapping['%s_password' % credential_name] = password
|
|
|
|
for cluster_id, host_ids in cluster_hosts.items():
|
|
status, resp = client.set_security(
|
|
cluster_id, **credential_mapping)
|
|
logging.info(
|
|
'set security config to cluster %s status: %s, resp: %s',
|
|
cluster_id, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to set security %s for cluster %s' % (
|
|
credential_mapping, cluster_id)
|
|
raise Exception(msg)
|
|
|
|
|
|
def _set_cluster_networking(client, cluster_hosts):
|
|
"""set cluster networking."""
|
|
networking_map = {}
|
|
networkings = [
|
|
network for network in flags.OPTIONS.networking.split(';')
|
|
if '=' in network
|
|
]
|
|
logging.info('set cluster networking: %s', networkings)
|
|
for networking in networkings:
|
|
networking_name, networking_value = networking.split('=', 1)
|
|
if not networking_name:
|
|
raise Exception(
|
|
'there is no networking name in %s' % networking)
|
|
|
|
if networking_name.endswith('_promisc'):
|
|
networking_map[networking_name] = int(networking_value)
|
|
else:
|
|
networking_map[networking_name] = networking_value
|
|
|
|
for cluster_id, host_ids in cluster_hosts.items():
|
|
status, resp = client.set_networking(
|
|
cluster_id, **networking_map)
|
|
logging.info(
|
|
'set networking config %s to cluster %s status: %s, resp: %s',
|
|
networking_map, cluster_id, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to set networking config %s to cluster %s' % (
|
|
networking_map, cluster_id)
|
|
raise Exception(msg)
|
|
|
|
|
|
def _set_cluster_partition(client, cluster_hosts):
|
|
"""set partiton of each host in cluster."""
|
|
partitions = [
|
|
partition for partition in flags.OPTIONS.partitions.split(',')
|
|
if ':' in partition
|
|
]
|
|
logging.info('set cluster partition: %s', partitions)
|
|
partiton_mapping = {}
|
|
for partition in partitions:
|
|
partition_name, partition_pair = partition.split(':', 1)
|
|
if not partition_name:
|
|
raise Exception(
|
|
'there is no partition name in %s' % partition)
|
|
|
|
if not partition_pair:
|
|
raise Exception(
|
|
'there is no partition pair in %s' % partition)
|
|
|
|
if '=' not in partition_pair:
|
|
raise Exception(
|
|
'there is no = in %s' % partition_pair)
|
|
|
|
partition_type, partition_value = partition_pair.split('=', 1)
|
|
if partition_type == 'percentage':
|
|
partition_value = int(partition_value)
|
|
elif partition_type == 'mbytes':
|
|
partition_value = int(partition_value)
|
|
else:
|
|
raise Exception(
|
|
'unsupported partition type %s' % partition_type)
|
|
|
|
partiton_mapping[
|
|
'%s_%s' % (partition_name, partition_type)
|
|
] = partition_value
|
|
|
|
for cluster_id, host_ids in cluster_hosts.items():
|
|
status, resp = client.set_partition(
|
|
cluster_id, **partiton_mapping)
|
|
logging.info(
|
|
'set partition config %s to cluster %s status: %s, resp: %s',
|
|
partiton_mapping, cluster_id, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to set partition %s to cluster %s' % (
|
|
partiton_mapping, cluster_id)
|
|
raise Exception(msg)
|
|
|
|
|
|
def _set_host_config(client, cluster_hosts):
|
|
host_configs = []
|
|
for host in flags.OPTIONS.host_roles.split(';'):
|
|
if not host:
|
|
continue
|
|
|
|
hostname, roles = host.split('=', 1)
|
|
if hostname:
|
|
roles = [role for role in roles.split(',') if role]
|
|
|
|
host_configs.append({
|
|
'hostname': hostname,
|
|
'roles': roles
|
|
})
|
|
|
|
total_hosts = 0
|
|
for cluster_id, host_ids in cluster_hosts.items():
|
|
total_hosts += len(host_ids)
|
|
|
|
if total_hosts != len(host_configs):
|
|
msg = '%s host to assign but got %s host configs' % (
|
|
total_hosts, len(host_configs))
|
|
raise Exception(msg)
|
|
|
|
for cluster_id, host_ids in cluster_hosts.items():
|
|
for hostid in host_ids:
|
|
host_config = host_configs.pop(0)
|
|
status, resp = client.update_host_config(
|
|
hostid, **host_config)
|
|
logging.info(
|
|
'set host %s config %s status: %s, resp: %s',
|
|
hostid, host_config, status, resp
|
|
)
|
|
if status >= 400:
|
|
msg = 'failed to set host %s config %s' % (
|
|
hostid, host_config)
|
|
raise Exception(msg)
|
|
|
|
|
|
def _deploy_clusters(client, cluster_hosts):
|
|
"""deploy cluster."""
|
|
for cluster_id, host_ids in cluster_hosts.items():
|
|
status, resp = client.deploy_hosts(cluster_id)
|
|
logging.info(
|
|
'deploy cluster %s status: %s, resp: %s',
|
|
cluster_id, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to deploy cluster %s' % cluster_id
|
|
raise Exception(msg)
|
|
|
|
|
|
def _get_installing_progress(client, cluster_hosts):
|
|
"""get intalling progress."""
|
|
timeout = time.time() + 60 * float(flags.OPTIONS.deployment_timeout)
|
|
clusters_progress = {}
|
|
hosts_progress = {}
|
|
install_finished = False
|
|
failed_hosts = {}
|
|
failed_clusters = {}
|
|
while time.time() < timeout:
|
|
found_installing_clusters = False
|
|
found_installing_hosts = False
|
|
for cluster_id, host_ids in cluster_hosts.items():
|
|
for hostid in host_ids:
|
|
if hostid in hosts_progress:
|
|
continue
|
|
|
|
status, resp = client.get_host_installing_progress(hostid)
|
|
logging.info(
|
|
'get host %s installing progress status: %s, resp: %s',
|
|
hostid, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to get host %s progress' % hostid
|
|
raise Exception(msg)
|
|
|
|
progress = resp['progress']
|
|
if (
|
|
progress['state'] not in ['UNINITIALIZED', 'INSTALLING'] or
|
|
progress['percentage'] >= 1.0
|
|
):
|
|
hosts_progress[hostid] = progress
|
|
if progress['state'] in ['ERROR']:
|
|
failed_hosts[hostid] = progress
|
|
|
|
else:
|
|
found_installing_hosts = True
|
|
|
|
if cluster_id in clusters_progress:
|
|
continue
|
|
|
|
status, resp = client.get_cluster_installing_progress(cluster_id)
|
|
logging.info(
|
|
'get cluster %s installing progress status: %s, resp: %s',
|
|
cluster_id, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to get cluster %s intsalling progress' % (
|
|
cluster_id)
|
|
raise Exception(msg)
|
|
|
|
progress = resp['progress']
|
|
if (
|
|
progress['state'] not in ['UNINITIALIZED', 'INSTALLING'] or
|
|
progress['percentage'] >= 1.0
|
|
):
|
|
clusters_progress[cluster_id] = progress
|
|
if progress['state'] in ['ERROR']:
|
|
failed_clusters[cluster_id] = progress
|
|
|
|
else:
|
|
found_installing_clusters = True
|
|
|
|
if found_installing_clusters and found_installing_hosts:
|
|
logging.info(
|
|
'there are some clusters/hosts in installing.'
|
|
'sleep %s seconds and retry',
|
|
flags.OPTIONS.progress_update_check_interval)
|
|
time.sleep(float(flags.OPTIONS.progress_update_check_interval))
|
|
else:
|
|
install_finished = True
|
|
logging.info('all clusters/hosts are installed.')
|
|
break
|
|
|
|
if not install_finished:
|
|
msg = 'installing %s is not all finished: hosts %s clusters %s' % (
|
|
cluster_hosts, hosts_progress, clusters_progress)
|
|
raise Exception(msg)
|
|
|
|
if failed_hosts:
|
|
msg = 'installing hosts failed: %s' % failed_hosts
|
|
raise Exception(msg)
|
|
|
|
if failed_clusters:
|
|
msg = 'installing clusters failed: %s' % failed_clusters
|
|
raise Exception(msg)
|
|
|
|
|
|
def _check_dashboard_links(client, cluster_hosts):
|
|
dashboard_role = flags.OPTIONS.dashboard_role
|
|
dashboard_link_pattern = re.compile(
|
|
flags.OPTIONS.dashboard_link_pattern)
|
|
for cluster_id, host_ids in cluster_hosts.items():
|
|
status, resp = client.get_dashboard_links(cluster_id)
|
|
logging.info(
|
|
'get cluster %s dashboard links status: %s, resp: %s',
|
|
cluster_id, status, resp)
|
|
if status >= 400:
|
|
msg = 'failed to get cluster %s dashboard links' % cluster_id
|
|
raise Exception(msg)
|
|
|
|
dashboardlinks = resp['dashboardlinks']
|
|
if dashboard_role not in dashboardlinks:
|
|
msg = 'no dashboard role %s found in %s' % (
|
|
dashboard_role, dashboardlinks)
|
|
raise Exception(msg)
|
|
|
|
r = requests.get(dashboardlinks[dashboard_role], verify=False)
|
|
r.raise_for_status()
|
|
match = dashboard_link_pattern.search(r.text)
|
|
if match:
|
|
logging.info(
|
|
'dashboard login page for cluster %s can be downloaded',
|
|
cluster_id)
|
|
else:
|
|
msg = (
|
|
'%s dashboard login page failed to be downloaded\n'
|
|
'the context is:\n%s\n'
|
|
) % (dashboard_role, r.text)
|
|
raise Exception(msg)
|
|
|
|
|
|
def main():
|
|
flags.init()
|
|
logsetting.init()
|
|
client = _get_client()
|
|
if flags.OPTIONS.poll_switches:
|
|
machines = _poll_switches(client)
|
|
else:
|
|
machines = _get_machines(client)
|
|
|
|
adapter_id = _get_adapter(client)
|
|
cluster_hosts = _add_cluster(client, adapter_id, machines)
|
|
_set_cluster_security(client, cluster_hosts)
|
|
_set_cluster_networking(client, cluster_hosts)
|
|
_set_cluster_partition(client, cluster_hosts)
|
|
_set_host_config(client, cluster_hosts)
|
|
_deploy_clusters(client, cluster_hosts)
|
|
_get_installing_progress(client, cluster_hosts)
|
|
_check_dashboard_links(client, cluster_hosts)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|