Enable HA mode for mesos bay in Heat templates

Support a cluster of master nodes, which tolerates failure of no more
than half number of nodes. The setup requires manually write down
IP addresses of all master nodes, which is impossible by using
cloud-init. Therefore, it replaces cloud-init with
SoftwareDeploymentGroup.

Using SoftwareDeployment requires to build additional packages into
the image [1]. The image building instruction has been updated for
that.

[1] http://docs.openstack.org/developer/heat/template_guide/
software_deployment.html#software-deployment-resources

Partially-Implements: blueprint mesos-multi-master-node
Change-Id: I34b85680803ea6070df94a6bccee3d677b8bfaab
This commit is contained in:
Hongbin Lu 2015-11-22 23:43:00 -05:00
parent 554af35b74
commit 64c5496536
9 changed files with 223 additions and 127 deletions

View File

@ -44,16 +44,19 @@ can be built and uploaded to glance as follows:
$ sudo apt-get update
$ sudo apt-get install git qemu-utils python-pip
$ sudo pip install pyyaml
$ git clone https://git.openstack.org/openstack/magnum
$ git clone https://git.openstack.org/openstack/diskimage-builder.git
$ git clone https://git.openstack.org/openstack/dib-utils.git
$ git clone https://git.openstack.org/openstack/tripleo-image-elements.git
$ git clone https://git.openstack.org/openstack/heat-templates.git
$ export PATH="${PWD}/dib-utils/bin:$PATH"
$ export ELEMENTS_PATH=magnum/magnum/templates/mesos/elements
$ export ELEMENTS_PATH=tripleo-image-elements/elements:heat-templates/hot/software-config/elements:magnum/magnum/templates/mesos/elements
$ export DIB_RELEASE=trusty
$ diskimage-builder/bin/disk-image-create ubuntu vm docker mesos \
os-collect-config os-refresh-config os-apply-config \
heat-config heat-config-script \
-o ubuntu-mesos.qcow2
$ glance image-create --name ubuntu-mesos --visibility public \
@ -94,6 +97,18 @@ your environment:
dns_nameserver: 8.8.8.8
server_image: ubuntu-mesos
The parameters above will create a stack with one master node. If you want to
create a stack with multiple master nodes (HA mode), create a file like below:
::
parameters:
ssh_key_name: testkey
external_network: public
dns_nameserver: 8.8.8.8
server_image: ubuntu-mesos
number_of_masters: 3
And then create the stack, referencing that environment file:
::

View File

@ -1,32 +1,72 @@
#!/bin/sh
#!/bin/bash
. /etc/sysconfig/heat-params
echo "configuring mesos (master)"
mkdir -p /etc/marathon/conf
echo "Configuring mesos (master)"
# Set a ID for each master node
echo "1" > /etc/zookeeper/conf/myid
myip=$(ip addr show eth0 |
awk '$1 == "inet" {print $2}' | cut -f1 -d/)
# Append server IP address(es)
echo "
server.1=$MESOS_MASTER_IP:2888:3888
" >> /etc/zookeeper/conf/zoo.cfg
# Fix /etc/hosts
sed -i "s/127.0.1.1/$myip/" /etc/hosts
# List of Zookeeper URLs
echo "zk://$MESOS_MASTER_IP:2181/mesos" > /etc/mesos/zk
echo "zk://$MESOS_MASTER_IP:2181/mesos" > /etc/marathon/conf/master
echo "zk://$MESOS_MASTER_IP:2181/marathon" > /etc/marathon/conf/zk
######################################################################
#
# Configure ZooKeeper
#
# List all ZooKeeper nodes
id=1
for master_ip in $MESOS_MASTERS_IPS; do
echo "server.$((id++))=${master_ip}:2888:3888" >> /etc/zookeeper/conf/zoo.cfg
done
# Set a ID for this node
id=1
for master_ip in $MESOS_MASTERS_IPS; do
if [ "$master_ip" = "$myip" ]; then
break
fi
id=$((id+1))
done
echo "$id" > /etc/zookeeper/conf/myid
######################################################################
#
# Configure Mesos
#
# Set the ZooKeeper URL
zk="zk://"
for master_ip in $MESOS_MASTERS_IPS; do
zk="${zk}${master_ip}:2181,"
done
# Remove tailing ',' (format: zk://host1:port1,...,hostN:portN/path)
zk=${zk::-1}
echo "${zk}/mesos" > /etc/mesos/zk
# The IP address to listen on
echo "$myip" > /etc/mesos-master/ip
# The size of the quorum of replicas
echo "1" > /etc/mesos-master/quorum
echo "$QUORUM" > /etc/mesos-master/quorum
# The hostname the master should advertise in ZooKeeper
echo "$MESOS_MASTER_IP" > /etc/mesos-master/hostname
echo "$MESOS_MASTER_IP" > /etc/marathon/conf/hostname
# The hostname advertised in ZooKeeper
echo "$myip" > /etc/mesos-master/hostname
# The IP address to listen on
echo "$MESOS_MASTER_IP" > /etc/mesos-master/ip
# The IP address to listen on
# The cluster name
echo "$CLUSTER_NAME" > /etc/mesos-master/cluster
######################################################################
#
# Configure Marathon
#
mkdir -p /etc/marathon/conf
# Set the ZooKeeper URL
echo "${zk}/mesos" > /etc/marathon/conf/master
echo "${zk}/marathon" > /etc/marathon/conf/zk
# Set the hostname advertised in ZooKeeper
echo "$myip" > /etc/marathon/conf/hostname

View File

@ -1,14 +1,21 @@
#!/bin/sh
#!/bin/bash
. /etc/sysconfig/heat-params
echo "configuring mesos (slave)"
echo "Configuring mesos (slave)"
myip=$(ip addr show eth0 |
awk '$1 == "inet" {print $2}' | cut -f1 -d/)
# This specifies how to connect to a master or a quorum of masters
echo "zk://$MESOS_MASTER_IP:2181/mesos" > /etc/mesos/zk
zk=""
for master_ip in $MESOS_MASTERS_IPS; do
zk="${zk}${master_ip}:2181,"
done
# Remove last ','
zk=${zk::-1}
# Zookeeper URL. This specifies how to connect to a quorum of masters
# Format: zk://host1:port1,...,hostN:portN/path
echo "zk://${zk}/mesos" > /etc/mesos/zk
# The hostname the slave should report
echo "$myip" > /etc/mesos-slave/hostname

View File

@ -0,0 +1,8 @@
#!/bin/sh
mkdir -p /etc/sysconfig
cat > /etc/sysconfig/heat-params <<EOF
MESOS_MASTERS_IPS="$MESOS_MASTERS_IPS"
CLUSTER_NAME="$CLUSTER_NAME"
QUORUM="$((($NUMBER_OF_MASTERS+1)/2))"
EOF

View File

@ -1,9 +0,0 @@
#cloud-config
merge_how: dict(recurse_array)+list(append)
write_files:
- path: /etc/sysconfig/heat-params
owner: "root:root"
permissions: "0644"
content: |
MESOS_MASTER_IP="$MESOS_MASTER_IP"
CLUSTER_NAME="$CLUSTER_NAME"

View File

@ -5,5 +5,5 @@ write_files:
owner: "root:root"
permissions: "0644"
content: |
MESOS_MASTER_IP="$MESOS_MASTER_IP"
MESOS_MASTERS_IPS="$MESOS_MASTERS_IPS"
EXECUTOR_REGISTRATION_TIMEOUT="$EXECUTOR_REGISTRATION_TIMEOUT"

View File

@ -64,6 +64,11 @@ parameters:
considering it hung and shutting it down
default: 5mins
number_of_masters:
type: number
description: how many mesos masters to spawn initially
default: 1
resources:
######################################################################
@ -108,22 +113,13 @@ resources:
- protocol: tcp
port_range_min: 22
port_range_max: 22
- protocol: tcp
remote_mode: remote_group_id
secgroup_mesos:
type: OS::Neutron::SecurityGroup
properties:
rules:
- protocol: tcp
port_range_min: 2181
port_range_max: 2181
- protocol: tcp
port_range_min: 2888
port_range_max: 2888
remote_mode: remote_group_id
- protocol: tcp
port_range_min: 3888
port_range_max: 3888
remote_mode: remote_group_id
- protocol: tcp
port_range_min: 5050
port_range_max: 5050
@ -131,6 +127,98 @@ resources:
port_range_min: 8080
port_range_max: 8080
######################################################################
#
# Master SoftwareConfig.
#
write_params_master:
type: OS::Heat::SoftwareConfig
properties:
group: script
config: {get_file: fragments/write-heat-params-master.sh}
inputs:
- name: MESOS_MASTERS_IPS
type: String
- name: CLUSTER_NAME
type: String
- name: QUORUM
type: String
configure_master:
type: OS::Heat::SoftwareConfig
properties:
group: script
config: {get_file: fragments/configure-mesos-master.sh}
start_services_master:
type: OS::Heat::SoftwareConfig
properties:
group: script
config: {get_file: fragments/start-services-master.sh}
######################################################################
#
# Master SoftwareDeployment.
#
write_params_master_deployment:
type: OS::Heat::SoftwareDeploymentGroup
properties:
config: {get_resource: write_params_master}
servers: {get_attr: [mesos_masters, attributes, mesos_server_id]}
input_values:
MESOS_MASTERS_IPS: {list_join: [' ', {get_attr: [mesos_masters, mesos_master_ip]}]}
CLUSTER_NAME: {get_param: cluster_name}
NUMBER_OF_MASTERS: {get_param: number_of_masters}
configure_master_deployment:
type: OS::Heat::SoftwareDeploymentGroup
depends_on:
- write_params_master_deployment
properties:
config: {get_resource: configure_master}
servers: {get_attr: [mesos_masters, attributes, mesos_server_id]}
start_services_master_deployment:
type: OS::Heat::SoftwareDeploymentGroup
depends_on:
- configure_master_deployment
properties:
config: {get_resource: start_services_master}
servers: {get_attr: [mesos_masters, attributes, mesos_server_id]}
######################################################################
#
# load balancers.
#
api_monitor:
type: OS::Neutron::HealthMonitor
properties:
type: TCP
delay: 5
max_retries: 5
timeout: 5
api_pool:
type: OS::Neutron::Pool
properties:
protocol: HTTP
monitors: [{get_resource: api_monitor}]
subnet: {get_resource: fixed_subnet}
lb_method: ROUND_ROBIN
vip:
protocol_port: 8080
api_pool_floating:
type: OS::Neutron::FloatingIP
depends_on:
- extrouter_inside
properties:
floating_network: {get_param: external_network}
port_id: {get_attr: [api_pool, vip, port_id]}
######################################################################
#
# Mesos masters. This is a resource group that will create 1 master.
@ -141,7 +229,7 @@ resources:
depends_on:
- extrouter_inside
properties:
count: 1
count: {get_param: number_of_masters}
resource_def:
type: mesosmaster.yaml
properties:
@ -149,12 +237,11 @@ resources:
server_image: {get_param: server_image}
master_flavor: {get_param: master_flavor}
external_network: {get_param: external_network}
cluster_name: {get_param: cluster_name}
wait_condition_timeout: {get_param: wait_condition_timeout}
fixed_network: {get_resource: fixed_network}
fixed_subnet: {get_resource: fixed_subnet}
secgroup_base_id: {get_resource: secgroup_base}
secgroup_mesos_id: {get_resource: secgroup_mesos}
api_pool_id: {get_resource: api_pool}
######################################################################
#
@ -166,7 +253,6 @@ resources:
type: OS::Heat::ResourceGroup
depends_on:
- extrouter_inside
- mesos_masters
properties:
count: {get_param: number_of_slaves}
resource_def:
@ -177,15 +263,16 @@ resources:
slave_flavor: {get_param: slave_flavor}
fixed_network: {get_resource: fixed_network}
fixed_subnet: {get_resource: fixed_subnet}
mesos_master_ip: {'Fn::Select': [0, {get_attr: [mesos_masters, mesos_master_ip]}]}
mesos_masters_ips: {list_join: [' ', {get_attr: [mesos_masters, mesos_master_ip]}]}
external_network: {get_param: external_network}
wait_condition_timeout: {get_param: wait_condition_timeout}
executor_registration_timeout: {get_param: executor_registration_timeout}
secgroup_base_id: {get_resource: secgroup_base}
outputs:
api_address:
value: {'Fn::Select': [0, {get_attr: [mesos_masters, mesos_master_external_ip]}]}
value: {get_attr: [api_pool_floating, floating_ip_address]}
description: >
This is the API endpoint of the Mesos master. Use this to access
the Mesos API from outside the cluster.

View File

@ -24,14 +24,6 @@ parameters:
type: string
description: uuid/name of a network to use for floating ip addresses
cluster_name:
type: string
description: human readable name for the mesos cluster
wait_condition_timeout:
type: number
description: timeout for the Wait Conditions
# The following are all generated in the parent template.
fixed_network:
type: string
@ -45,69 +37,12 @@ parameters:
secgroup_mesos_id:
type: string
description: ID of the security group for mesos master.
api_pool_id:
type: string
description: ID of the load balancer pool of Marathon.
resources:
master_wait_handle:
type: OS::Heat::WaitConditionHandle
master_wait_condition:
type: OS::Heat::WaitCondition
depends_on: mesos_master
properties:
handle: {get_resource: master_wait_handle}
timeout: {get_param: wait_condition_timeout}
######################################################################
#
# software configs. these are components that are combined into
# a multipart MIME user-data archive.
#
write_heat_params:
type: OS::Heat::SoftwareConfig
properties:
group: ungrouped
config:
str_replace:
template: {get_file: fragments/write-heat-params-master.yaml}
params:
"$MESOS_MASTER_IP": {get_attr: [mesos_master_eth0, fixed_ips, 0, ip_address]}
"$CLUSTER_NAME": {get_param: cluster_name}
configure_mesos:
type: OS::Heat::SoftwareConfig
properties:
group: ungrouped
config: {get_file: fragments/configure-mesos-master.sh}
start_services:
type: OS::Heat::SoftwareConfig
properties:
group: ungrouped
config: {get_file: fragments/start-services-master.sh}
master_wc_notify:
type: OS::Heat::SoftwareConfig
properties:
group: ungrouped
config:
str_replace:
template: |
#!/bin/bash -v
wc_notify --data-binary '{"status": "SUCCESS"}'
params:
wc_notify: {get_attr: [master_wait_handle, curl_cli]}
mesos_master_init:
type: OS::Heat::MultipartMime
properties:
parts:
- config: {get_resource: write_heat_params}
- config: {get_resource: configure_mesos}
- config: {get_resource: start_services}
- config: {get_resource: master_wc_notify}
######################################################################
#
# Mesos master server.
@ -119,8 +54,7 @@ resources:
image: {get_param: server_image}
flavor: {get_param: master_flavor}
key_name: {get_param: ssh_key_name}
user_data_format: RAW
user_data: {get_resource: mesos_master_init}
user_data_format: SOFTWARE_CONFIG
networks:
- port: {get_resource: mesos_master_eth0}
@ -141,6 +75,13 @@ resources:
floating_network: {get_param: external_network}
port_id: {get_resource: mesos_master_eth0}
api_pool_member:
type: OS::Neutron::PoolMember
properties:
pool_id: {get_param: api_pool_id}
address: {get_attr: [mesos_master_eth0, fixed_ips, 0, ip_address]}
protocol_port: 8080
outputs:
mesos_master_ip:
@ -148,3 +89,6 @@ outputs:
mesos_master_external_ip:
value: {get_attr: [mesos_master_floating, floating_ip_address]}
mesos_server_id:
value: {get_resource: mesos_master}

View File

@ -40,15 +40,18 @@ parameters:
default: 5mins
# The following are all generated in the parent template.
mesos_master_ip:
mesos_masters_ips:
type: string
description: IP address of the Mesos master server.
description: IP addresses of the Mesos master servers.
fixed_network:
type: string
description: Network from which to allocate fixed addresses.
fixed_subnet:
type: string
description: Subnet from which to allocate fixed addresses.
secgroup_base_id:
type: string
description: ID of the security group for base.
resources:
@ -84,7 +87,7 @@ resources:
str_replace:
template: {get_file: fragments/write-heat-params.yaml}
params:
"$MESOS_MASTER_IP": {get_param: mesos_master_ip}
"$MESOS_MASTERS_IPS": {get_param: mesos_masters_ips}
"$EXECUTOR_REGISTRATION_TIMEOUT": {get_param: executor_registration_timeout}
configure_mesos_slave:
@ -142,6 +145,7 @@ resources:
network: {get_param: fixed_network}
security_groups:
- get_resource: secgroup_all_open
- get_param: secgroup_base_id
fixed_ips:
- subnet: {get_param: fixed_subnet}
replacement_policy: AUTO