From 0ccd804849fc064ad309e02141c7a4b3dfd5fa7c Mon Sep 17 00:00:00 2001 From: Vladimir Kuklin Date: Fri, 27 Mar 2015 18:23:47 +0300 Subject: [PATCH] Backward-compatible commit for packaging of fuel-library based on Change-Id: Ie759857fb94db9aa94aaeaeda2c6ab5bb159cc9e All the work done for fuel-library packaging Should be overriden by the change above after we switch CI to package-based implements blueprint: package-fuel-components Change-Id: I48ed37a009b42f0a9a21cc869a869edb505b39c3 --- debian/changelog | 17 + debian/compat | 1 + debian/control | 18 + debian/fuel-ha-utils.install | 5 + debian/fuel-misc.install | 2 + debian/rules | 10 + debian/source/format | 1 + .../ceilometer_ha/manifests/agent/central.pp | 4 +- .../manifests/alarm/evaluator.pp | 4 +- .../cluster/manifests/corosync/cs_service.pp | 12 +- .../puppet/cluster/manifests/haproxy_ocf.pp | 9 +- .../puppet/cluster/manifests/neutron.pp | 4 +- .../puppet/cluster/manifests/virtual_ip.pp | 2 +- .../puppet/docker/manifests/dockerctl.pp | 7 +- .../puppet/docker/templates/dockerctl.erb | 2 +- deployment/puppet/galera/manifests/init.pp | 6 +- deployment/puppet/haproxy/manifests/status.pp | 14 - deployment/puppet/heat_ha/manifests/engine.pp | 8 +- .../puppet/l23network/manifests/init.pp | 1 + .../puppet/nailgun/examples/host-only.pp | 1 + .../puppet/openstack/files/clustercheck | 76 + .../openstack/manifests/galera/status.pp | 16 +- .../templates/galera_clustercheck.erb | 74 - .../lib/facter/fuel_pkgs_exist.rb | 22 + .../modular/fuel_pkgs/fuel_pkgs.pp | 10 + .../modular/fuel_pkgs/tasks.yaml | 9 + .../osnailyfacter/modular/tools/tools.pp | 3 - .../modular/virtual_ips/conntrackd.pp | 2 +- files/fuel-docker-utils/dockerctl | 123 ++ files/fuel-docker-utils/dockerctl-alias.sh | 2 + files/fuel-docker-utils/dockerctl_config | 132 ++ files/fuel-docker-utils/functions.sh | 695 +++++++++ .../get_service_credentials.py | 28 + .../ocf/ceilometer-agent-central | 351 +++++ .../ocf/ceilometer-alarm-evaluator | 338 ++++ files/fuel-ha-utils/ocf/haproxy | 281 ++++ files/fuel-ha-utils/ocf/heat_engine_centos | 353 +++++ files/fuel-ha-utils/ocf/heat_engine_ubuntu | 354 +++++ files/fuel-ha-utils/ocf/mysql-wss | 701 +++++++++ files/fuel-ha-utils/ocf/ns_IPaddr2 | 700 +++++++++ files/fuel-ha-utils/ocf/ns_conntrackd | 378 +++++ files/fuel-ha-utils/ocf/ns_dns | 300 ++++ files/fuel-ha-utils/ocf/ns_haproxy | 555 +++++++ files/fuel-ha-utils/ocf/ns_ntp | 440 ++++++ .../fuel-ha-utils/ocf/ocf-neutron-dhcp-agent | 672 ++++++++ files/fuel-ha-utils/ocf/ocf-neutron-l3-agent | 684 ++++++++ .../ocf/ocf-neutron-metadata-agent | 366 +++++ files/fuel-ha-utils/ocf/ocf-neutron-ovs-agent | 405 +++++ files/fuel-ha-utils/ocf/rabbitmq | 1384 +++++++++++++++++ files/fuel-ha-utils/tools/clustercheck | 76 + files/fuel-ha-utils/tools/q-agent-cleanup.py | 645 ++++++++ files/fuel-ha-utils/tools/wsrepclustercheckrc | 12 + files/fuel-misc/centos_ifdown-local | 5 + files/fuel-misc/centos_ifup-local | 5 + .../fuel-misc/haproxy-status.sh | 2 +- specs/fuel-library6.1.spec | 168 ++ 56 files changed, 10371 insertions(+), 124 deletions(-) create mode 100644 debian/changelog create mode 100644 debian/compat create mode 100644 debian/control create mode 100644 debian/fuel-ha-utils.install create mode 100644 debian/fuel-misc.install create mode 100755 debian/rules create mode 100644 debian/source/format delete mode 100644 deployment/puppet/haproxy/manifests/status.pp create mode 100644 deployment/puppet/openstack/files/clustercheck create mode 100644 deployment/puppet/osnailyfacter/lib/facter/fuel_pkgs_exist.rb create mode 100644 deployment/puppet/osnailyfacter/modular/fuel_pkgs/fuel_pkgs.pp create mode 100644 deployment/puppet/osnailyfacter/modular/fuel_pkgs/tasks.yaml create mode 100644 files/fuel-docker-utils/dockerctl create mode 100644 files/fuel-docker-utils/dockerctl-alias.sh create mode 100644 files/fuel-docker-utils/dockerctl_config create mode 100644 files/fuel-docker-utils/functions.sh create mode 100644 files/fuel-docker-utils/get_service_credentials.py create mode 100644 files/fuel-ha-utils/ocf/ceilometer-agent-central create mode 100644 files/fuel-ha-utils/ocf/ceilometer-alarm-evaluator create mode 100755 files/fuel-ha-utils/ocf/haproxy create mode 100644 files/fuel-ha-utils/ocf/heat_engine_centos create mode 100644 files/fuel-ha-utils/ocf/heat_engine_ubuntu create mode 100644 files/fuel-ha-utils/ocf/mysql-wss create mode 100755 files/fuel-ha-utils/ocf/ns_IPaddr2 create mode 100644 files/fuel-ha-utils/ocf/ns_conntrackd create mode 100644 files/fuel-ha-utils/ocf/ns_dns create mode 100755 files/fuel-ha-utils/ocf/ns_haproxy create mode 100644 files/fuel-ha-utils/ocf/ns_ntp create mode 100644 files/fuel-ha-utils/ocf/ocf-neutron-dhcp-agent create mode 100644 files/fuel-ha-utils/ocf/ocf-neutron-l3-agent create mode 100644 files/fuel-ha-utils/ocf/ocf-neutron-metadata-agent create mode 100644 files/fuel-ha-utils/ocf/ocf-neutron-ovs-agent create mode 100755 files/fuel-ha-utils/ocf/rabbitmq create mode 100644 files/fuel-ha-utils/tools/clustercheck create mode 100644 files/fuel-ha-utils/tools/q-agent-cleanup.py create mode 100644 files/fuel-ha-utils/tools/wsrepclustercheckrc create mode 100644 files/fuel-misc/centos_ifdown-local create mode 100644 files/fuel-misc/centos_ifup-local rename deployment/puppet/haproxy/templates/haproxy-status.sh.erb => files/fuel-misc/haproxy-status.sh (91%) create mode 100644 specs/fuel-library6.1.spec diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000000..e92875517c --- /dev/null +++ b/debian/changelog @@ -0,0 +1,17 @@ +fuel-library6.1 (6.0.0-1) precise; urgency=low + + * Update code from upstream + + -- Igor Kalnitsky Wed, 26 Nov 2014 19:49:00 +0200 + +fuel-library6.1 (0.0.1-ubuntu1) precise; urgency=low + + * Update code from upstream + + -- OSCI Jenkins Wed, 03 Sep 2014 15:20:13 +0400 + +fuel-library6.1 (0.0.1) unstable; urgency=low + + * Initial release. + + -- Mirantis Product Tue, 20 Aug 2013 22:20:46 +0400 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000000..7f8f011eb7 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +7 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000000..8ce361de2f --- /dev/null +++ b/debian/control @@ -0,0 +1,18 @@ +Source: fuel-library6.1 +Section: admin +Priority: optional +Maintainer: Mirantis Product +Build-Depends: debhelper (>= 7), python-all +Standards-Version: 3.9.2 + +Package: fuel-ha-utils +Architecture: all +Depends: ${misc:Depends}, ${shlibs:Depends}, python-keystoneclient, python-neutronclient +Description: Fuel Library HA utils + . + +Package: fuel-misc +Architecture: all +Depends: ${misc:Depends}, ${shlibs:Depends} +Description: Misc Fuel library scripts + . diff --git a/debian/fuel-ha-utils.install b/debian/fuel-ha-utils.install new file mode 100644 index 0000000000..f8e3e75ec6 --- /dev/null +++ b/debian/fuel-ha-utils.install @@ -0,0 +1,5 @@ +files/fuel-ha-utils/ocf/* /usr/lib/ocf/resource.d/fuel +files/fuel-ha-utils/tools/q-agent-cleanup.py /usr/bin +files/fuel-ha-utils/tools/wsrepclustercheckrc /etc +files/fuel-ha-utils/tools/clustercheck /usr/bin + diff --git a/debian/fuel-misc.install b/debian/fuel-misc.install new file mode 100644 index 0000000000..5693c0fbb7 --- /dev/null +++ b/debian/fuel-misc.install @@ -0,0 +1,2 @@ +files/fuel-misc/haproxy-status.sh /usr/bin + diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000000..31f44e59ef --- /dev/null +++ b/debian/rules @@ -0,0 +1,10 @@ +#!/usr/bin/make -f + +%: + dh $@ --with python2 + +override_dh_fixperms: + chmod 755 debian/fuel-ha-utils/usr/lib/ocf/resource.d/fuel/* + dh_fixperms + + diff --git a/debian/source/format b/debian/source/format new file mode 100644 index 0000000000..163aaf8d82 --- /dev/null +++ b/debian/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/deployment/puppet/ceilometer_ha/manifests/agent/central.pp b/deployment/puppet/ceilometer_ha/manifests/agent/central.pp index 9c78a510d9..94cf86ce8b 100644 --- a/deployment/puppet/ceilometer_ha/manifests/agent/central.pp +++ b/deployment/puppet/ceilometer_ha/manifests/agent/central.pp @@ -15,6 +15,6 @@ class ceilometer_ha::agent::central inherits ceilometer::agent::central { 'timeout' => '360', }, }, - ocf_script_file => 'cluster/ocf/ceilometer-agent-central', + ocf_script_file => 'cluster/ocf/ceilometer-agent-central', } -} \ No newline at end of file +} diff --git a/deployment/puppet/ceilometer_ha/manifests/alarm/evaluator.pp b/deployment/puppet/ceilometer_ha/manifests/alarm/evaluator.pp index 403f7beabf..c637050911 100644 --- a/deployment/puppet/ceilometer_ha/manifests/alarm/evaluator.pp +++ b/deployment/puppet/ceilometer_ha/manifests/alarm/evaluator.pp @@ -15,6 +15,6 @@ class ceilometer_ha::alarm::evaluator inherits ceilometer::alarm::evaluator { 'timeout' => '360', }, }, - ocf_script_file => 'cluster/ocf/ceilometer-alarm-evaluator', + ocf_script_file => 'cluster/ocf/ceilometer-alarm-evaluator', } -} \ No newline at end of file +} diff --git a/deployment/puppet/cluster/manifests/corosync/cs_service.pp b/deployment/puppet/cluster/manifests/corosync/cs_service.pp index 35784aa5e6..e04a5ca08c 100644 --- a/deployment/puppet/cluster/manifests/corosync/cs_service.pp +++ b/deployment/puppet/cluster/manifests/corosync/cs_service.pp @@ -24,7 +24,7 @@ define cluster::corosync::cs_service ( } # OCF script for pacemaker - file { $ocf_script : + file { $ocf_script : path => "/usr/lib/ocf/resource.d/fuel/${ocf_script}", mode => '0755', owner => root, @@ -55,11 +55,13 @@ define cluster::corosync::cs_service ( } } } - File[$ocf_script] -> Cs_resource["p_${service_name}"] -> Service[$service_true_title] - } else { - File[$ocf_script] -> Service[$service_true_title] + File <| title == $ocf_script |> -> Cs_resource["p_${service_name}"] + Cs_resource["p_${service_name}"] -> Service[$service_true_title] + } + else + { + File <| title == $ocf_script |> -> Service[$service_true_title] } - if ! $package_name { warning('Cluster::corosync::cs_service: Without package definition can\'t protect service for autostart correctly.') } else { diff --git a/deployment/puppet/cluster/manifests/haproxy_ocf.pp b/deployment/puppet/cluster/manifests/haproxy_ocf.pp index 93059ee2cf..98ac6ea5c3 100644 --- a/deployment/puppet/cluster/manifests/haproxy_ocf.pp +++ b/deployment/puppet/cluster/manifests/haproxy_ocf.pp @@ -15,9 +15,9 @@ class cluster::haproxy_ocf ( path =>'/usr/lib/ocf/resource.d/fuel/ns_haproxy', mode => '0755', owner => root, - group => root, - source => 'puppet:///modules/cluster/ocf/ns_haproxy', - } + group => root, + source => 'puppet:///modules/cluster/ocf/ns_haproxy', + } Anchor['haproxy'] -> File['haproxy-ocf'] File<| title == 'ocf-fuel-path' |> -> File['haproxy-ocf'] @@ -81,7 +81,7 @@ class cluster::haproxy_ocf ( if ($::osfamily == 'Debian') { file { '/etc/default/haproxy': content => 'ENABLED=0', - } -> File['haproxy-ocf'] + } -> File['haproxy-ocf'] if $::operatingsystem == 'Ubuntu' { file { '/etc/init/haproxy.override': ensure => 'present', @@ -98,6 +98,7 @@ class cluster::haproxy_ocf ( enable => false, } -> File['haproxy-ocf'] + sysctl::value { 'net.ipv4.ip_nonlocal_bind': value => '1' } -> diff --git a/deployment/puppet/cluster/manifests/neutron.pp b/deployment/puppet/cluster/manifests/neutron.pp index 259443bc57..499492e929 100644 --- a/deployment/puppet/cluster/manifests/neutron.pp +++ b/deployment/puppet/cluster/manifests/neutron.pp @@ -6,7 +6,7 @@ class cluster::neutron () { File<| title == 'ocf-mirantis-path' |> -> Package['neutron'] -> - file {'q-agent-cleanup.py': + file {'q-agent-cleanup.py': path => '/usr/bin/q-agent-cleanup.py', mode => '0755', owner => root, @@ -25,4 +25,4 @@ class cluster::neutron () { if !defined(Package['lsof']) { package { 'lsof': } } -} \ No newline at end of file +} diff --git a/deployment/puppet/cluster/manifests/virtual_ip.pp b/deployment/puppet/cluster/manifests/virtual_ip.pp index 1f9c07982d..61444fe194 100644 --- a/deployment/puppet/cluster/manifests/virtual_ip.pp +++ b/deployment/puppet/cluster/manifests/virtual_ip.pp @@ -20,7 +20,7 @@ define cluster::virtual_ip ( ){ $vip_name = "vip__${key}" - File['ns-ipaddr2-ocf'] -> Cs_resource[$vip_name] + File<| title == 'ns-ipaddr2-ocf' |> -> Cs_resource[$vip_name] cs_resource { $vip_name: ensure => present, diff --git a/deployment/puppet/docker/manifests/dockerctl.pp b/deployment/puppet/docker/manifests/dockerctl.pp index 3db3eefd54..b0889049ba 100644 --- a/deployment/puppet/docker/manifests/dockerctl.pp +++ b/deployment/puppet/docker/manifests/dockerctl.pp @@ -4,12 +4,13 @@ class docker::dockerctl ( $config_dir = '/etc/dockerctl', $profile_dir = '/etc/profile.d', $admin_ipaddress = $::fuel_settings['ADMIN_NETWORK']['ipaddress'], + $docker_engine = 'native', $release, $production, ) { # Make sure we have needed directories - file { [$bin_dir, $share_dir, $config_dir, $profile_dir]: + file { [$bin_dir, $share_dir, $config_dir, $profile_dir]: ensure => directory; } @@ -18,7 +19,7 @@ class docker::dockerctl ( mode => 0755, content => template("docker/dockerctl.erb"); } - + file { "$profile_dir/dockerctl.sh": content => template("docker/dockerctl-alias.sh.erb"), owner => 'root', @@ -29,7 +30,7 @@ class docker::dockerctl ( mode => 0755, content => template("docker/get_service_credentials.py.erb") } - file { "$share_dir/functions": + file { "$share_dir/functions.sh": mode => 0644, content => template("docker/functions.sh.erb") } diff --git a/deployment/puppet/docker/templates/dockerctl.erb b/deployment/puppet/docker/templates/dockerctl.erb index c4e56e41cf..33033773e1 100644 --- a/deployment/puppet/docker/templates/dockerctl.erb +++ b/deployment/puppet/docker/templates/dockerctl.erb @@ -16,7 +16,7 @@ confdir="<%= @config_dir %>" . "$confdir/config" -. "<%= @share_dir %>/functions" +. "<%= @share_dir %>/functions.sh" DEBUG=true #Sets var nonopts diff --git a/deployment/puppet/galera/manifests/init.pp b/deployment/puppet/galera/manifests/init.pp index 9888fc9cf5..d042ca95b7 100644 --- a/deployment/puppet/galera/manifests/init.pp +++ b/deployment/puppet/galera/manifests/init.pp @@ -191,13 +191,13 @@ class galera ( }, } Anchor['galera'] -> - File['mysql-wss-ocf'] -> + File['mysql-wss-ocf'] -> Cs_resource["p_${service_name}"] -> Service['mysql'] -> Exec['wait-for-synced-state'] } else { Anchor['galera'] -> - File['mysql-wss-ocf'] -> + File['mysql-wss-ocf'] -> Service['mysql'] } @@ -211,7 +211,7 @@ class galera ( File<| title == 'ocf-fuel-path' |> -> File['mysql-wss-ocf'] - Package['MySQL-server', 'galera'] -> File['mysql-wss-ocf'] + Package['MySQL-server', 'galera'] -> File['mysql-wss-ocf'] tweaks::ubuntu_service_override { 'mysql': package_name => 'MySQL-server', diff --git a/deployment/puppet/haproxy/manifests/status.pp b/deployment/puppet/haproxy/manifests/status.pp deleted file mode 100644 index 0ba7be0eed..0000000000 --- a/deployment/puppet/haproxy/manifests/status.pp +++ /dev/null @@ -1,14 +0,0 @@ -class haproxy::status ( - $haproxy_socket = '/var/lib/haproxy/stats', - $file = '/usr/local/bin/haproxy-status', -) { - - file { $file : - ensure => present, - mode => '0755', - owner => 'root', - group => 'root', - content => template('haproxy/haproxy-status.sh.erb'), - } - -} diff --git a/deployment/puppet/heat_ha/manifests/engine.pp b/deployment/puppet/heat_ha/manifests/engine.pp index bb8d0e0adf..a262b754e6 100644 --- a/deployment/puppet/heat_ha/manifests/engine.pp +++ b/deployment/puppet/heat_ha/manifests/engine.pp @@ -1,10 +1,10 @@ class heat_ha::engine inherits heat::engine { $primitive_type = 'heat-engine' - if $::osfamily == 'RedHat' { - $ocf_script_template = 'heat/heat_engine_centos.ocf.erb' - } else { - $ocf_script_template = 'heat/heat_engine_ubuntu.ocf.erb' + if $::osfamily == 'RedHat' { + $ocf_script_template = 'heat/heat_engine_centos.ocf.erb' + } else { + $ocf_script_template = 'heat/heat_engine_ubuntu.ocf.erb' } $metadata = { diff --git a/deployment/puppet/l23network/manifests/init.pp b/deployment/puppet/l23network/manifests/init.pp index ec63ca43f9..0d2a4b2919 100644 --- a/deployment/puppet/l23network/manifests/init.pp +++ b/deployment/puppet/l23network/manifests/init.pp @@ -52,6 +52,7 @@ class l23network ( Anchor <| title == 'l23network::l2::centos_upndown_scripts' |> -> Anchor['l23network::init'] } + Anchor['l23network::l2::init'] -> Anchor['l23network::init'] anchor { 'l23network::init': } diff --git a/deployment/puppet/nailgun/examples/host-only.pp b/deployment/puppet/nailgun/examples/host-only.pp index 4036a30a32..7cb7a42c91 100644 --- a/deployment/puppet/nailgun/examples/host-only.pp +++ b/deployment/puppet/nailgun/examples/host-only.pp @@ -47,6 +47,7 @@ class { 'docker::dockerctl': release => $::fuel_version['VERSION']['release'], production => $production, admin_ipaddress => $::fuel_settings['ADMIN_NETWORK']['ipaddress'], + docker_engine => 'native', } class { "docker": diff --git a/deployment/puppet/openstack/files/clustercheck b/deployment/puppet/openstack/files/clustercheck new file mode 100644 index 0000000000..580b864472 --- /dev/null +++ b/deployment/puppet/openstack/files/clustercheck @@ -0,0 +1,76 @@ +#!/bin/bash +# +# Script to make a proxy (ie HAProxy) capable of monitoring Percona XtraDB Cluster nodes properly +# +# Author: Olaf van Zandwijk +# Author: Raghavendra Prabhu +# +# Documentation and download: https://github.com/olafz/percona-clustercheck +# +# Based on the original script from Unai Rodriguez +# + +if [[ $1 == '-h' || $1 == '--help' ]];then + echo "Usage: $0 " + exit +fi + +[ -f /etc/wsrepclustercheckrc ] && . /etc/wsrepclustercheckrc + +if [[ -r $DEFAULTS_EXTRA_FILE ]];then + MYSQL_CMDLINE="mysql --defaults-extra-file=$DEFAULTS_EXTRA_FILE -nNE --connect-timeout=$TIMEOUT \ + --user=${MYSQL_USERNAME} --password=${MYSQL_PASSWORD} -h ${MYSQL_HOST} -P ${MYSQL_PORT}" +else + MYSQL_CMDLINE="mysql -nNE --connect-timeout=$TIMEOUT --user=${MYSQL_USERNAME} --password=${MYSQL_PASSWORD} \ + -h ${MYSQL_HOST} -P ${MYSQL_PORT}" +fi +# +# Perform the query to check the wsrep_local_state +# +WSREP_STATUS=$($MYSQL_CMDLINE -e "SHOW STATUS LIKE 'wsrep_local_state';" \ + 2>${ERR_FILE} | tail -1 2>>${ERR_FILE}) + +if [[ "${WSREP_STATUS}" == "4" ]] || [[ "${WSREP_STATUS}" == "2" && ${AVAILABLE_WHEN_DONOR} == 1 ]] +then + # Check only when set to 0 to avoid latency in response. + if [[ $AVAILABLE_WHEN_READONLY -eq 0 ]];then + READ_ONLY=$($MYSQL_CMDLINE -e "SHOW GLOBAL VARIABLES LIKE 'read_only';" \ + 2>${ERR_FILE} | tail -1 2>>${ERR_FILE}) + + if [[ "${READ_ONLY}" == "ON" ]];then + # Percona XtraDB Cluster node local state is 'Synced', but it is in + # read-only mode. The variable AVAILABLE_WHEN_READONLY is set to 0. + # => return HTTP 503 + # Shell return-code is 1 + echo -en "HTTP/1.1 503 Service Unavailable\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 43\r\n" + echo -en "\r\n" + echo -en "Percona XtraDB Cluster Node is read-only.\r\n" + sleep 0.1 + exit 1 + fi + fi + # Percona XtraDB Cluster node local state is 'Synced' => return HTTP 200 + # Shell return-code is 0 + echo -en "HTTP/1.1 200 OK\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 40\r\n" + echo -en "\r\n" + echo -en "Percona XtraDB Cluster Node is synced.\r\n" + sleep 0.1 + exit 0 +else + # Percona XtraDB Cluster node local state is not 'Synced' => return HTTP 503 + # Shell return-code is 1 + echo -en "HTTP/1.1 503 Service Unavailable\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 44\r\n" + echo -en "\r\n" + echo -en "Percona XtraDB Cluster Node is not synced.\r\n" + sleep 0.1 + exit 1 +fi diff --git a/deployment/puppet/openstack/manifests/galera/status.pp b/deployment/puppet/openstack/manifests/galera/status.pp index cff8f32d2a..84e2d8886c 100644 --- a/deployment/puppet/openstack/manifests/galera/status.pp +++ b/deployment/puppet/openstack/manifests/galera/status.pp @@ -81,11 +81,19 @@ class openstack::galera::status ( } } - file { '/usr/local/bin/clustercheck': + file { '/etc/wsrepclustercheckrc': content => template('openstack/galera_clustercheck.erb'), mode => '0755', } + file { '/usr/bin/clustercheck': + mode => '0755', + owner => root, + group => root, + source => "puppet:///modules/openstack/clustercheck", + require => File['/etc/wsrepclustercheckrc'], + } + augeas { 'galeracheck': context => '/files/etc/services', changes => [ @@ -94,7 +102,7 @@ class openstack::galera::status ( "set /files/etc/services/service-name[port = '${port}']/protocol tcp", "set /files/etc/services/service-name[port = '${port}']/#comment 'Galera Cluster Check'", ], - require => File['/usr/local/bin/clustercheck'], + require => File['/usr/bin/clustercheck'], } $group = $::osfamily ? { @@ -109,10 +117,10 @@ class openstack::galera::status ( port => $port, cps => '512 10', per_source => 'UNLIMITED', - server => '/usr/local/bin/clustercheck', + server => '/usr/bin/clustercheck', user => 'nobody', group => $group, flags => 'IPv4', - require => File['/usr/local/bin/clustercheck'], + require => File['/usr/bin/clustercheck'], } } diff --git a/deployment/puppet/openstack/templates/galera_clustercheck.erb b/deployment/puppet/openstack/templates/galera_clustercheck.erb index 090aa2ce69..9cb87825f5 100644 --- a/deployment/puppet/openstack/templates/galera_clustercheck.erb +++ b/deployment/puppet/openstack/templates/galera_clustercheck.erb @@ -1,19 +1,3 @@ -#!/bin/bash -# -# Script to make a proxy (ie HAProxy) capable of monitoring Percona XtraDB Cluster nodes properly -# -# Author: Olaf van Zandwijk -# Author: Raghavendra Prabhu -# -# Documentation and download: https://github.com/olafz/percona-clustercheck -# -# Based on the original script from Unai Rodriguez -# - -if [[ $1 == '-h' || $1 == '--help' ]];then - echo "Usage: $0 " - exit -fi MYSQL_USERNAME="<%= @status_user %>" MYSQL_PASSWORD="<%= @status_password %>" @@ -26,61 +10,3 @@ DEFAULTS_EXTRA_FILE=${6:-/etc/my.cnf} #Timeout exists for instances where mysqld may be hung TIMEOUT=<%= @backend_timeout %> - -if [[ -r $DEFAULTS_EXTRA_FILE ]];then - MYSQL_CMDLINE="mysql --defaults-extra-file=$DEFAULTS_EXTRA_FILE -nNE --connect-timeout=$TIMEOUT \ - --user=${MYSQL_USERNAME} --password=${MYSQL_PASSWORD} -h ${MYSQL_HOST} -P ${MYSQL_PORT}" -else - MYSQL_CMDLINE="mysql -nNE --connect-timeout=$TIMEOUT --user=${MYSQL_USERNAME} --password=${MYSQL_PASSWORD} \ - -h ${MYSQL_HOST} -P ${MYSQL_PORT}" -fi -# -# Perform the query to check the wsrep_local_state -# -WSREP_STATUS=$($MYSQL_CMDLINE -e "SHOW STATUS LIKE 'wsrep_local_state';" \ - 2>${ERR_FILE} | tail -1 2>>${ERR_FILE}) - -if [[ "${WSREP_STATUS}" == "4" ]] || [[ "${WSREP_STATUS}" == "2" && ${AVAILABLE_WHEN_DONOR} == 1 ]] -then - # Check only when set to 0 to avoid latency in response. - if [[ $AVAILABLE_WHEN_READONLY -eq 0 ]];then - READ_ONLY=$($MYSQL_CMDLINE -e "SHOW GLOBAL VARIABLES LIKE 'read_only';" \ - 2>${ERR_FILE} | tail -1 2>>${ERR_FILE}) - - if [[ "${READ_ONLY}" == "ON" ]];then - # Percona XtraDB Cluster node local state is 'Synced', but it is in - # read-only mode. The variable AVAILABLE_WHEN_READONLY is set to 0. - # => return HTTP 503 - # Shell return-code is 1 - echo -en "HTTP/1.1 503 Service Unavailable\r\n" - echo -en "Content-Type: text/plain\r\n" - echo -en "Connection: close\r\n" - echo -en "Content-Length: 43\r\n" - echo -en "\r\n" - echo -en "Percona XtraDB Cluster Node is read-only.\r\n" - sleep 0.1 - exit 1 - fi - fi - # Percona XtraDB Cluster node local state is 'Synced' => return HTTP 200 - # Shell return-code is 0 - echo -en "HTTP/1.1 200 OK\r\n" - echo -en "Content-Type: text/plain\r\n" - echo -en "Connection: close\r\n" - echo -en "Content-Length: 40\r\n" - echo -en "\r\n" - echo -en "Percona XtraDB Cluster Node is synced.\r\n" - sleep 0.1 - exit 0 -else - # Percona XtraDB Cluster node local state is not 'Synced' => return HTTP 503 - # Shell return-code is 1 - echo -en "HTTP/1.1 503 Service Unavailable\r\n" - echo -en "Content-Type: text/plain\r\n" - echo -en "Connection: close\r\n" - echo -en "Content-Length: 44\r\n" - echo -en "\r\n" - echo -en "Percona XtraDB Cluster Node is not synced.\r\n" - sleep 0.1 - exit 1 -fi diff --git a/deployment/puppet/osnailyfacter/lib/facter/fuel_pkgs_exist.rb b/deployment/puppet/osnailyfacter/lib/facter/fuel_pkgs_exist.rb new file mode 100644 index 0000000000..129af73277 --- /dev/null +++ b/deployment/puppet/osnailyfacter/lib/facter/fuel_pkgs_exist.rb @@ -0,0 +1,22 @@ +#FIXME(aglarendil): this fact is a temporary workaround +#FIXME: It should be removed after switching to +#FIXME: packages +require 'facter' +Facter.add('fuel_pkgs_exist') do + setcode do + rv = 'false' + case Facter.value('osfamily') + when /(?i)(debian)/ + pkg_grep_cmd = "apt-cache search" + when /(?i)(redhat)/ + pkg_grep_cmd = "yum list | grep" + end + + out1=Facter::Util::Resolution.exec("#{pkg_grep_cmd} fuel-ha-utils") + out2=Facter::Util::Resolution.exec("#{pkg_grep_cmd} fuel-misc") + if !out1.to_s.empty? and !out2.to_s.empty? + rv = 'true' + end + rv + end +end diff --git a/deployment/puppet/osnailyfacter/modular/fuel_pkgs/fuel_pkgs.pp b/deployment/puppet/osnailyfacter/modular/fuel_pkgs/fuel_pkgs.pp new file mode 100644 index 0000000000..f4ee3d2ad7 --- /dev/null +++ b/deployment/puppet/osnailyfacter/modular/fuel_pkgs/fuel_pkgs.pp @@ -0,0 +1,10 @@ +notice('MODULAR: fuel_pkgs.pp') + +$fuel_packages=['fuel-ha-utils','fuel-misc'] +notify{"this is the place where ${fuel_packages} should be installed":} +#FIXME(algarendil): remove this if when we switch to pkg-based stuff +if $::fuel_pkgs_exist == 'true' +{ + package {$fuel_packages: ensure => latest } +} + diff --git a/deployment/puppet/osnailyfacter/modular/fuel_pkgs/tasks.yaml b/deployment/puppet/osnailyfacter/modular/fuel_pkgs/tasks.yaml new file mode 100644 index 0000000000..1ea5905dff --- /dev/null +++ b/deployment/puppet/osnailyfacter/modular/fuel_pkgs/tasks.yaml @@ -0,0 +1,9 @@ +- id: fuel_pkgs + type: puppet + groups: [primary-controller, controller, cinder, cinder-vmware, compute, ceph-osd, zabbix-server, primary-mongo, mongo] + requires: [deploy_start] + required_for: [hiera] + parameters: + puppet_manifest: /etc/puppet/modules/osnailyfacter/modular/fuel_pkgs/fuel_pkgs.pp + puppet_modules: /etc/puppet/modules + timeout: 600 diff --git a/deployment/puppet/osnailyfacter/modular/tools/tools.pp b/deployment/puppet/osnailyfacter/modular/tools/tools.pp index 97dd432020..6c3381e773 100644 --- a/deployment/puppet/osnailyfacter/modular/tools/tools.pp +++ b/deployment/puppet/osnailyfacter/modular/tools/tools.pp @@ -24,6 +24,3 @@ class { 'puppet::pull' : } $deployment_mode = hiera('deployment_mode') -if ($deployment_mode == 'ha') or ($deployment_mode == 'ha_compact') { - include haproxy::status -} diff --git a/deployment/puppet/osnailyfacter/modular/virtual_ips/conntrackd.pp b/deployment/puppet/osnailyfacter/modular/virtual_ips/conntrackd.pp index 41bccd2c3c..81db0db2fd 100644 --- a/deployment/puppet/osnailyfacter/modular/virtual_ips/conntrackd.pp +++ b/deployment/puppet/osnailyfacter/modular/virtual_ips/conntrackd.pp @@ -17,7 +17,7 @@ if $operatingsystem == 'Ubuntu' { content => template('cluster/conntrackd.conf.erb'), } -> - file { '/usr/lib/ocf/resource.d/fuel/ns_conntrackd': + file { '/usr/lib/ocf/resource.d/fuel/ns_conntrackd': mode => '0755', owner => root, group => root, diff --git a/files/fuel-docker-utils/dockerctl b/files/fuel-docker-utils/dockerctl new file mode 100644 index 0000000000..ea46af419b --- /dev/null +++ b/files/fuel-docker-utils/dockerctl @@ -0,0 +1,123 @@ +#!/bin/bash +# Copyright 2015 Mirantis, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +confdir="/etc/dockerctl" +. "$confdir/config" +. "/usr/share/dockerctl/functions.sh" +DEBUG=true + +#Sets var nonopts +declare -a nonopts +parse_options "$@" +set -- "${nonopts[@]}" + +if [ -z "$1" ] || [ "$1" = "help" ]; then + echo "Please specify a command." + show_usage + exit 1 +fi + +if [ -z "$2" ] || [ "$2" = "all" ]; then + container="all" +else + container=$2 +fi + +container_seq=$CONTAINER_SEQUENCE + +if [ "$1" == "build" ]; then + if [ "$container" = "storage" ]; then + build_storage_containers + run_storage_containers + elif [ "$container" = "all" ];then + #Step 1: prepare storage containers + build_storage_containers + run_storage_containers + + #Prepare iptables just in case ICC is broken + allow_all_docker_traffic + + #Step 2: Launch all in order, checking each one + for service in $container_seq; do + start_container $service + check_ready $service + done + else + start_container $container + fi +elif [ "$1" == "start" ]; then + if [ "$container" = "all" ];then + for service in $container_seq; do + start_container $service + check_ready $service + sleep 4 + done + else + shift 2 + start_container $container $@ + if ! [[ "$@" =~ "--attach" ]]; then + check_ready $container + fi + fi +elif [ "$1" == "check" ]; then + if [ "$container" = "all" ];then + exit_code=0 + for service in $container_seq; do + check_ready $service || exit_code=1 + done + exit $exit_code + else + check_ready $container + fi +elif [ "$1" == "list" ]; then + shift 1 + list_containers "$@" +elif [ "$1" == "copy" ]; then + shift 1 + copy_files "$@" +elif [ "$1" == "restart" ]; then + shift 2 + restart_container $container $@ +elif [ "$1" == "stop" ]; then + shift 2 + stop_container $container $@ +elif [ "$1" == "revert" ]; then + shift 2 + revert_container $container $@ +elif [ "$1" == "shell" ]; then + shift 2 + shell_container $container "$@" +elif [ "$1" == "upgrade" ]; then + shift 2 + upgrade_container $container $@ +elif [ "$1" == "restore" ]; then + shift + restore "$@" +elif [ "$1" == "backup" ]; then + shift + backup "$@" +elif [ "$1" == "destroy" ]; then + shift 2 + destroy_container $container $@ +elif [ "$1" == "logs" ]; then + logs $container +elif [ "$1" == "post_start_hooks" ]; then + shift 2 + post_start_hooks "$container" "$@" +else + echo "Invalid selection." + show_usage +fi diff --git a/files/fuel-docker-utils/dockerctl-alias.sh b/files/fuel-docker-utils/dockerctl-alias.sh new file mode 100644 index 0000000000..74965b7a48 --- /dev/null +++ b/files/fuel-docker-utils/dockerctl-alias.sh @@ -0,0 +1,2 @@ +alias cobbler='dockerctl shell cobbler cobbler' +alias mco='dockerctl shell astute mco' diff --git a/files/fuel-docker-utils/dockerctl_config b/files/fuel-docker-utils/dockerctl_config new file mode 100644 index 0000000000..e14069bb0d --- /dev/null +++ b/files/fuel-docker-utils/dockerctl_config @@ -0,0 +1,132 @@ +#!/bin/bash + +### Container information +#Paths +DOCKER_ROOT="/var/www/nailgun/docker" +IMAGE_DIR="${DOCKER_ROOT}/images" +SOURCE_DIR="${DOCKER_ROOT}/sources" +#SUPERVISOR_CONF_DIR="${docker_root}/supervisor.d" +#SUPERVISOR_CONF_DIR="${docker_root}/supervisor" +#SUPERVISOR_CONF_DIR="<%= @config_dir %>/supervisor/" +ASTUTE_YAML="/etc/fuel/astute.yaml" +DOCKER_ENGINE="native" +ADMIN_IP="10.20.0.2" +LOCAL_IP="127.0.0.1" + +#Version of Fuel to deploy +VERSION=$(awk '/release/{gsub(/"/, "");print $2}' /etc/fuel/version.yaml || echo <%= @release %>) +IMAGE_PREFIX="fuel" +# busybox image for storage containers +BUSYBOX_IMAGE="busybox.tar.gz" +# base centos image for all containers +BASE_IMAGE="centos.tar.gz" +# Order in which to start all containers +CONTAINER_SEQUENCE="postgres rabbitmq keystone rsync astute rsyslog nailgun ostf nginx cobbler mcollective" + +# storage container names +PREFIX_STORAGE="fuel-core-${VERSION}-volume-" + +# app container names +declare -A CONTAINER_NAMES +prefix="fuel-core-${VERSION}-" +CONTAINER_NAMES['cobbler']="${prefix}cobbler" +CONTAINER_NAMES['postgres']="${prefix}postgres" +CONTAINER_NAMES['rabbitmq']="${prefix}rabbitmq" +CONTAINER_NAMES['rsync']="${prefix}rsync" +CONTAINER_NAMES['astute']="${prefix}astute" +CONTAINER_NAMES['nailgun']="${prefix}nailgun" +CONTAINER_NAMES['ostf']="${prefix}ostf" +CONTAINER_NAMES['nginx']="${prefix}nginx" +CONTAINER_NAMES['rsyslog']="${prefix}rsyslog" +CONTAINER_NAMES['mcollective']="${prefix}mcollective" +CONTAINER_NAMES['keystone']="${prefix}keystone" + +# app container options +declare -A CONTAINER_OPTIONS +base_opts="-t --net=host" +FOREGROUND="-i" +BACKGROUND="-d" +BIND_ALL="" +BIND_ADMIN="${ADMIN_IP}:" +BIND_LOCAL="${LOCAL_IP}:" +CONTAINER_OPTIONS["cobbler"]="-p ${BIND_ALL}53:53/udp -p ${BIND_ALL}69:69/udp -p ${BIND_ALL}80:80 -p ${BIND_ALL}443:443 --privileged $base_opts" +CONTAINER_OPTIONS["postgres"]="-p ${BIND_ADMIN}5432:5432 -p ${BIND_LOCAL}5432:5432 $base_opts" +CONTAINER_OPTIONS["rabbitmq"]="-p ${BIND_ADMIN}5672:5672 -p ${BIND_LOCAL}5672:5672 -p ${BIND_ADMIN}4369:4369 -p ${BIND_LOCAL}4369:4369 -p ${BIND_ADMIN}15672:15672 -p ${BIND_LOCAL}15672:15672 -p ${BIND_ADMIN}61613:61613 -p ${BIND_LOCAL}61613:61613 $base_opts" +CONTAINER_OPTIONS["rsync"]="-p ${BIND_ADMIN}873:873 -p ${BIND_LOCAL}873:873 $base_opts" +CONTAINER_OPTIONS["astute"]="$base_opts" +CONTAINER_OPTIONS["nailgun"]="--privileged -p ${BIND_ADMIN}8001:8001 -p ${BIND_LOCAL}8001:8001 $base_opts" +CONTAINER_OPTIONS["ostf"]="-p ${BIND_ADMIN}8777:8777 -p ${BIND_LOCAL}8777:8777 $base_opts" +CONTAINER_OPTIONS["nginx"]="-p ${BIND_ALL}8000:8000 -p ${BIND_ALL}8080:8080 $base_opts" +CONTAINER_OPTIONS["rsyslog"]="--privileged -p ${BIND_ADMIN}514:514 -p ${BIND_LOCAL}514:514 -p ${BIND_ADMIN}514:514/udp -p ${BIND_LOCAL}514:514/udp -p ${BIND_ADMIN}25150:25150 -p ${BIND_LOCAL}25150:25150 $base_opts" +CONTAINER_OPTIONS["mcollective"]="--privileged $base_opts" +CONTAINER_OPTIONS["keystone"]="-p ${BIND_ALL}5000:5000 -p ${BIND_ALL}35357:35357 $base_opts" + +###### +#Dependency of volumes for deployment +##### + +#Host volumes +declare -A HOST_VOL +HOST_VOL['repo']="-v /var/www/nailgun:/var/www/nailgun:rw -v /etc/yum.repos.d:/etc/yum.repos.d:rw" +HOST_VOL['puppet']="-v /etc/puppet:/etc/puppet:ro" +HOST_VOL['sshkey']="-v /root/.ssh:/root/.ssh:ro" +HOST_VOL['astuteyaml']="-v /etc/fuel:/etc/fuel:ro" +HOST_VOL['logs']="-v /var/log/docker-logs:/var/log" +HOST_VOL['keys']="-v /var/lib/fuel/keys:/var/lib/fuel/keys:rw" + +#Persistent directories for container data +HOST_VOL['astute_data']="-v /var/lib/fuel/container_data/$VERSION/astute:/var/lib/astute" +HOST_VOL['cobbler_data']="-v /var/lib/fuel/container_data/$VERSION/cobbler:/var/lib/cobbler" +HOST_VOL['postgres_data']="-v /var/lib/fuel/container_data/$VERSION/postgres:/var/lib/pgsql" + +#Storage container volume mounts +declare -A CONTAINER_VOLUMES +#Format: CONTAINER_VOLUMES[$FOO_CNT]="${HOST_VOL['foo']}" + +#Storage container volumes +#Format: FOO_VOL="--volumes-from $FOO_CNT" + +#Nailgun static files are in nailgun container +NAILGUN_VOL="--volumes-from ${CONTAINER_NAMES['nailgun']}" +COBBLER_VOL="--volumes-from ${CONTAINER_NAMES['cobbler']}" +ASTUTE_VOL="--volumes-from ${CONTAINER_NAMES['astute']}" + +#App container volume mounts +###requirements cheat sheet +###ALL: astuteyaml repo logs +###puppet: rsync +###sshkey: nailgun cobbler astute mcollective ostf +###repo: nginx mcollective +###nailgun: nginx +###cobbler: mcollective +###astute: mcollective +declare -A REQS +REQS["${HOST_VOL['astuteyaml']}"]="cobbler postgres rabbitmq rsync astute nailgun ostf nginx rsyslog mcollective keystone" +REQS["${HOST_VOL['keys']}"]="mcollective astute" +REQS["${HOST_VOL['repo']}"]="cobbler postgres rabbitmq rsync astute nailgun ostf nginx rsyslog mcollective keystone" +REQS["${HOST_VOL['logs']}"]="cobbler postgres rabbitmq rsync astute nailgun ostf nginx rsyslog mcollective keystone" +#Most containers work from local integrated puppet, but rsync needs to serve host puppet +REQS["${HOST_VOL['puppet']}"]="rsync" +#Uncomment to enable host puppet manifests +#REQS["${HOST_VOL['puppet']}"]="cobbler postgres rabbitmq rsync astute nailgun ostf nginx rsyslog mcollective keystone" +REQS["${HOST_VOL['astute_data']}"]="astute" +REQS["${HOST_VOL['cobbler_data']}"]="cobbler" +REQS["${HOST_VOL['postgres_data']}"]="postgres" +REQS["${HOST_VOL['sshkey']}"]="nailgun cobbler astute mcollective ostf" +REQS["$NAILGUN_VOL"]="nginx" +REQS["$COBBLER_VOL"]="mcollective" +REQS["$ASTUTE_VOL"]="mcollective" + +#assemble requirements +for requirement in "${!REQS[@]}"; do + for container in ${REQS["$requirement"]}; do + CONTAINER_VOLUMES["$container"]+="${requirement} " + done +done + +#backup settings +SYSTEM_DIRS="/etc/puppet /etc/fuel /var/lib/fuel /var/www/nailgun /root/.ssh" +BACKUP_ROOT="/var/backup/fuel" + +# number of retries for "docker check" +CHECK_RETRIES=240 diff --git a/files/fuel-docker-utils/functions.sh b/files/fuel-docker-utils/functions.sh new file mode 100644 index 0000000000..f7c3eb4ae6 --- /dev/null +++ b/files/fuel-docker-utils/functions.sh @@ -0,0 +1,695 @@ +#!/bin/bash +# Copyright 2015 Mirantis, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +if ${DEBUG}; then + DOCKER="docker -D" +else + DOCKER="docker" +fi + +function show_usage { + echo "Usage:" + echo " $0 command" + echo + echo "Available commands: (Note: work in progress)" + echo " help: show this message" + echo " build: create all Docker containers" + echo " list: list container short names (-l for more output)" + echo " start: start all Docker containers" + echo " restart: restart one or more Docker containers" + echo " stop: stop one or more Docker containers" + echo " shell: start a shell or run a command in a Docker container" + echo " logs: print console log from a container" + echo " revert: reset container to original state" + echo " destroy: destroy one or more containers" + echo " copy: copy files in or out of container" + echo " check: check of container is ready" + echo " backup: back up entire deployment" + echo " restore: restore backed up deployment" +} + +function parse_options { + opts="$@" + for opt in $@; do + case $opt in + -V|--version) VERSION=$2 + shift 2 + ;; + -d|--debug) DEBUG=true + shift + ;; + --nodebug) DEBUG=false + shift + ;; + --) shift + nonopts+=("$@") + return + ;; + help|build|start|check|list|copy|restart|stop|revert|shell|upgrade|restore|backup|destroy|logs|post_start_hooks) + nonopts+=("$@") + return + ;; + -*) echo "Unrecognized option: $opt" 1>&2 + exit 1 + ;; + *) nonopts+=("$opt") + ;; + esac + done +} +function debug { + if $DEBUG; then + echo $@ + fi +} +function build_image { + ${DOCKER} build -t $2 $1 +} + +function revert_container { + stop_container $1 + destroy_container $1 + start_container $1 +} + +function build_storage_containers { + #Format: build_image $SOURCE_DIR/storage-foo storage/foo + return 0 +} + +function retry_checker { + tries=0 + echo "checking with command \"$*\"" + until eval $*; do + rc=$? + ((tries++)) + echo "try number $tries" + echo "return code is $rc" + if [ $tries -gt $CHECK_RETRIES ];then + failure=1 + break + fi + sleep 1 + done +} + +function get_service_credentials { + credentialfile=$(mktemp /tmp/servicepws.XXXXX) + get_service_credentials.py $ASTUTE_YAML > $credentialfile + . $credentialfile + rm -f $credentialfile +} + +function check_ready { + #Uses a custom command to ensure a container is ready + get_service_credentials + failure=0 + echo "checking container $1" + + case $1 in + nailgun) retry_checker "shell_container nailgun supervisorctl status nailgun | grep -q RUNNING" ;; + ostf) retry_checker "egrep -q ^[2-4][0-9]? < <(curl --connect-timeout 1 -s -w '%{http_code}' http://$ADMIN_IP:8777/ostf/not_found -o /dev/null)" ;; + #NOTICE: Cobbler console tool does not comply unix conversation: 'cobbler profile find' always return 0 as exit code + cobbler) retry_checker "shell_container cobbler ps waux | grep -q 'cobblerd -F' && pgrep dnsmasq" + retry_checker "shell_container cobbler cobbler profile find --name=centos* | grep -q centos && shell_container cobbler cobbler profile find --name=ubuntu* | grep -q ubuntu && shell_container cobbler cobbler profile find --name=bootstrap* | grep -q bootstrap" ;; + rabbitmq) retry_checker "curl -f -L -i -u \"$astute_user:$astute_password\" http://$ADMIN_IP:15672/api/nodes 1>/dev/null 2>&1" + retry_checker "curl -f -L -u \"$mcollective_user:$mcollective_password\" -s http://$ADMIN_IP:15672/api/exchanges | grep -qw 'mcollective_broadcast'" + retry_checker "curl -f -L -u \"$mcollective_user:$mcollective_password\" -s http://$ADMIN_IP:15672/api/exchanges | grep -qw 'mcollective_directed'" ;; + postgres) retry_checker "shell_container postgres PGPASSWORD=$postgres_nailgun_password /usr/bin/psql -h $ADMIN_IP -U \"$postgres_nailgun_user\" \"$postgres_nailgun_dbname\" -c '\copyright' 2>&1 1>/dev/null" ;; + astute) retry_checker "shell_container astute ps waux | grep -q 'astuted'" + retry_checker "curl -f -L -u \"$astute_user:$astute_password\" -s http://$ADMIN_IP:15672/api/exchanges | grep -qw 'nailgun'" + retry_checker "curl -f -L -u \"$astute_user:$astute_password\" -s http://$ADMIN_IP:15672/api/exchanges | grep -qw 'naily_service'" ;; + rsync) retry_checker "shell_container rsync netstat -ntl | grep -q 873" ;; + rsyslog) retry_checker "shell_container rsyslog netstat -nl | grep -q 514" ;; + mcollective) retry_checker "shell_container mcollective ps waux | grep -q mcollectived" ;; + nginx) retry_checker "shell_container nginx ps waux | grep -q nginx" ;; + keystone) retry_checker "shell_container keystone keystone --os-auth-url \"http://$ADMIN_IP:35357/v2.0\" --os-username \"$keystone_nailgun_user\" --os-password \"$keystone_nailgun_password\" token-get &>/dev/null" ;; + *) echo "No defined test for determining if $1 is ready." + ;; + esac + + #Catch all to ensure puppet is not running + retry_checker "! shell_container $1 pgrep puppet" + + if [ $failure -eq 1 ]; then + echo "ERROR: $1 failed to start." + return 1 + else + echo "$1 is ready." + return 0 + fi +} + + +function run_storage_containers { + #Run storage containers once + #Note: storage containers exit, but keep volumes available + #Example: + #${DOCKER} run -d ${CONTAINER_VOLUMES[$FOO_CNT]} --name "$FOO_CNT" storage/foo || true + return 0 +} + +function export_containers { + #--trim option removes $CNT_PREFIX from container name when exporting + if [[ "$1" == "--trim" ]]; then + trim=true + shift + else + trim=false + fi + + for image in $@; do + [ $trim ] && image=$(sed "s/${CNT_PREFIX}//" <<< "$image") + ${DOCKER} export $1 | gzip -c > "${image}.tar.gz" + done +} +function list_containers { + #Usage: + # (no option) short names + # -l (short and long names and status) + if [[ "$1" = "-l" ]]; then + printf "%-13s%-25s%-13s%-25s\n" "Name" "Image" "Status" "Full container name" + for container in "${!CONTAINER_NAMES[@]}"; do + if container_created $container; then + if is_running $container; then + running="Running" + else + running="Stopped" + fi + else + running="Not created" + fi + longname="${CONTAINER_NAMES["$container"]}" + imagename="${IMAGE_PREFIX}/${container}_${VERSION}" + printf "%-13s%-25s%-13s%-25s\n" "$container" "$imagename" "$running" "$longname" + done + else + for container in "${!CONTAINER_NAMES[@]}"; do + echo $container + done + fi +} + +function commit_container { + container_name="${CONTAINER_NAMES[$1]}" + image="$IMAGE_PREFIX/$1_$VERSION" + ${DOCKER} commit $container_name $image +} +function start_container { + if [ -z "$1" ]; then + echo "Must specify a container name" 1>&2 + exit 1 + fi + if [ "$1" = "all" ]; then + for container in $CONTAINER_SEQUENCE; do + start_container $container + done + return + fi + image_name="$IMAGE_PREFIX/$1" + container_name=${CONTAINER_NAMES[$1]} + if container_created "$container_name"; then + pre_start_hooks $1 + if is_running "$container_name"; then + if is_ghost "$container_name"; then + restart_container $1 + else + echo "$container_name is already running." + fi + else + # Clean up broken mounts if needed + id=$(get_container_id $container_name) + umount -l $(grep "$id" /proc/mounts | awk '{print $2}' | sort -r) 2>/dev/null + ${DOCKER} start $container_name + fi + post_start_hooks $1 + if [ "$2" = "--attach" ]; then + attach_container $container_name + fi + else + first_run_container "$1" $2 + fi + +} + +function shutdown_container { + echo "Stopping $1..." + kill $2 + ${DOCKER} stop $1 + exit 0 +} + +function attach_container { + echo "Attaching to container $1..." + ${DOCKER} attach --no-stdin $1 & + APID=$! + trap "shutdown_container $1 $APID" INT TERM + while test -d "/proc/$APID/fd" ; do + sleep 10 & wait $! + done +} + +function shell_container { + case $EXEC_DRIVER in + lxc) lxc_shell_container "$@" + ;; + *) exec_shell_container "$@" + esac +} + +function exec_shell_container { + exec_opts='' + #Interactive shell only if we have TTY + if [ -t 0 ]; then + exec_opts+=' -i' + else + #FIXME(mattymo): BASH 3.1.3 and higher don't need sleep + sleep 0.1 + if read -t 0; then + exec_opts+=' -i' + fi + fi + if [ -t 1 -a ! -p /proc/self/fd/0 ]; then + exec_opts+=' -t' + fi + id=$(get_container_id "$1") + if [ $? -ne 0 ]; then + echo "Could not get docker ID for $container. Is it running?" 1>&2 + return 1 + fi + #TODO(mattymo): fix UTF-8 bash warning + #Setting C locale to suppress bash warning + prefix="env LANG=C" + if [ -z "$2" ]; then + command="/bin/bash" + else + shift + command=("$@") + fi + docker exec $exec_opts $id $prefix "${command[@]}" +} + +function lxc_shell_container { + id=$(get_container_id "$1") + if [ $? -ne 0 ]; then + echo "Could not get docker ID for $container. Is it running?" 1>&2 + return 1 + fi + if [ -z "$2" ]; then + command="/bin/bash" + else + shift + command=("$@") + fi + lxc-attach --name "$id" -- "${command[@]}" +} + +function stop_container { + if [[ "$1" == 'all' ]]; then + ${DOCKER} stop ${CONTAINER_NAMES[@]} + else + for container in $@; do + echo "Stopping $container..." + ${DOCKER} stop ${CONTAINER_NAMES[$container]} + done + fi +} + +function destroy_container { + if [[ "$1" == 'all' ]]; then + stop_container all + ${DOCKER} rm -f ${CONTAINER_NAMES[@]} + else + for container in $@; do + stop_container $container + ${DOCKER} rm -f ${CONTAINER_NAMES[$container]} + if [ $? -ne 0 ]; then + #This happens because devicemapper glitched + #Try to unmount all devicemapper mounts manually and try again + echo "Destruction of container $container failed. Trying workaround..." + id=$(${DOCKER} inspect -f='{{if .ID}}{{.ID}}{{else}}{{.Id}}{{end}}' ${CONTAINER_NAMES[$container]}) + if [ -z $id ]; then + echo "Could not get docker ID for $container" 1>&2 + return 1 + fi + umount -l $(grep "$id" /proc/mounts | awk '{print $2}' | sort -r) + #Try to delete again + ${DOCKER} rm -f ${CONTAINER_NAMES[$container]} + if [ $? -ne 0 ];then + echo "Workaround failed. Unable to destroy container $container." + fi + fi + done + fi +} + +function logs { + ${DOCKER} logs ${CONTAINER_NAMES[$1]} +} + + + +function restart_container { + ${DOCKER} restart ${CONTAINER_NAMES[$1]} +} + +function container_lookup { + echo ${CONTAINER_NAMES[$1]} +} + +function get_container_id { + #Try to get ID from container short name first + id=$(${DOCKER} inspect -f='{{if .ID}}{{.ID}}{{else}}{{.Id}}{{end}}' ${CONTAINER_NAMES[$1]} 2>/dev/null) + if [ -z "$id" ]; then + #Try to get ID short ID, long ID, or container name + id=$(${DOCKER} inspect -f='{{if .ID}}{{.ID}}{{else}}{{.Id}}{{end}}' "$1") + if [ -z "$id" ]; then + echo "Could not get docker ID for container $1. Is it running?" 1>&2 + return 1 + fi + fi + echo "$id" +} +function container_created { + ${DOCKER} ps -a | grep -q $1 + return $? +} +function is_ghost { + LANG=C ${DOCKER} ps | grep $1 | grep -q Ghost + return $? +} +function is_running { + ${DOCKER} ps | grep -q $1 + return $? +} +function first_run_container { + + opts="${CONTAINER_OPTIONS[$1]} ${CONTAINER_VOLUMES[$1]}" + container_name="${CONTAINER_NAMES[$1]}" + image="$IMAGE_PREFIX/$1_$VERSION" + if ! is_running $container_name; then + pre_setup_hooks $1 + ${DOCKER} run $opts $BACKGROUND --name=$container_name $image + post_setup_hooks $1 + else + echo "$container_name is already running." + fi + if [ "$2" = "--attach" ]; then + attach_container $container_name + fi + return 0 +} + +function pre_setup_hooks { + return 0 +} + +function pre_start_hooks { + return 0 +} + +function post_setup_hooks { + case $1 in + *) ;; + esac +} +function post_start_hooks { + case $1 in + *) ;; + esac +} + +function container_root { + id=$(${DOCKER} inspect -f='{{if .ID}}{{.ID}}{{else}}{{.Id}}{{end}}' ${CONTAINER_NAMES[$1]}) + if [ -n "$id" ]; then + echo "/var/lib/docker/devicemapper/mnt/${id}/rootfs" + return 0 + else + echo "Unable to get root for container ${1}." 1>&2 + return 1 + fi +} + +function copy_files { + #Overview: + # Works similar to rsync: + # Container to host: + # sync_files cobbler:/var/lib/tftpboot/ /localpath/ + # Host to container: + # sync_files /etc/puppet cobbler:/etc/puppet + #TODO(mattymo): add options and more parameters + + if [ -z "$2" ]; then + echo "This command requires two parameters. See usage:" + echo " $0 copy src dest" + echo + echo "Examples:" + echo " $0 copy nailgun:/etc/nailguns/settings.yaml /root/settings.yaml" + echo " $0 copy /root/newpkg.rpm mcollective:/root/" + exit 1 + fi + #Test which parameter is local + if test -n "$(shopt -s nullglob; echo $1*)"; then + method="push" + local=$1 + remote=$2 + else + method="pull" + remote=$1 + local=$2 + fi + container=$(echo $remote | cut -d':' -f1) + remotepath=$(echo $remote | cut -d':' -f2-) + if [[ ${CONTAINER_NAMES[@]} =~ .*${container}.* ]]; then + cont_root=$(container_root $container) + if [ $? -ne 0 ];then return 1; fi + else + echo "Unable to locate container to copy to/from." + return 2 + fi + remote="${cont_root}/${remotepath}" + if [ "$method" = "push" ]; then + cp -R $local $remote + else + cp -R $remote $local + fi +} + +function backup { + backup_id=$(date +%F_%H%S) + use_rsync=0 + #Sets backup_dir + parse_backup_dir $1 + [[ "$backup_dir" =~ var ]] && verify_disk_space "backup" + if check_nailgun_tasks; then + echo "There are currently running Fuel tasks. Please wait for them to \ +finish or cancel them." 1>&2 + exit 1 + fi + backup_containers "$backup_id" + backup_system_dirs + backup_compress + [ $use_rsync -eq 1 ] && backup_rsync_upload $rsync_dest $backup_dir + backup_cleanup $backup_dir + echo "Backup complete. File is available at $backup_dir/fuel_backup${image_suffix}.tar.lrz" +} + +function parse_backup_dir { + use_rsync=0 + if [ -z "$1" ]; then + #Default backup dir + backup_dir="${BACKUP_ROOT}/backup_${backup_id}" + elif [ -d "$1" ]; then + #User defined dir exists, so use it + backup_dir="$1" + elif [[ "$1" =~ .:. ]]; then + #Remote rsync dir + use_rsync=1 + backup_dir="${BACKUP_ROOT}/backup_${backup_id}" + rsync_dest="$1" + else + echo "Unrecognized backup destination. Valid options include:" 1>&2 + echo " (blank) - backup to $BACKUP_ROOT" 1>&2 + echo " /path/to/backup - local backup directory" 1>&2 + echo " user@server:/path - backup using rsync to server" 1>&2 + exit 1 + fi +} + +function backup_containers { +#Backs up all containers, regardless of being related to Fuel + + image_suffix="_$1" + purge_images=0 + + [ $purge_images -eq 0 ] && rm -rf "$backup_dir" + mkdir -p $SYSTEM_DIRS $backup_dir + echo "Reading container data..." + while read containerid; do + container_name="$(${DOCKER} inspect -f='{{.Name}}' $containerid | tr -d '/')" + container_image="$(${DOCKER} inspect -f='{{.Config.Image}}' $containerid)" + container_image+=$image_suffix + container_archive="$(echo "$container_image" | sed 's/\//__/').tar" + #Commit container as new image + echo "Committing $container_name..." + ${DOCKER} commit "$containerid" "${container_image}" + echo "Saving $container_name..." + ${DOCKER} save "${container_image}" > "${backup_dir}/${container_archive}" + echo "Cleaning up temporary image..." + ${DOCKER} rmi "${container_image}" + done < <(${DOCKER} ps -aq) +} + +function backup_system_dirs { + echo "Archiving system folders" + tar cf $backup_dir/system-dirs.tar -C / $SYSTEM_DIRS +} + +function backup_compress { + echo "Compressing archives..." + component_tars=($backup_dir/*.tar) + ( cd $backup_dir && tar cf $backup_dir/fuel_backup${image_suffix}.tar *.tar ) + rm -rf "${component_tars[@]}" + #Improve compression on bare metal + if [ -z "$(virt-what)" ] ; then + lrzopts="-L2 -U" + else + lrzopts="-L2" + fi + lrzip $lrzopts "$backup_dir/fuel_backup${image_suffix}.tar" -o "$backup_dir/fuel_backup${image_suffix}.tar.lrz" + +} + +function backup_rsync_upload { + dest="$1" + backup_dir="$2" + echo "Starting rsync backup. You may be prompted for a login." + rsync -vP $backup_dir/*.tar.lrz "$dest" +} + +function backup_cleanup { + echo "Cleaning up..." + [ -d "$1" ] && rm -f $1/*.tar +} + +function check_nailgun_tasks { +#Returns 0 if tasks are running in nailgun + #if command returns error, then app is not running + shell_container nailgun fuel task &> /dev/null || return 1 + shell_container nailgun fuel task | grep -q running &> /dev/null + return $? +} + +function restore { +#TODO(mattymo): Optionally not include system dirs during restore +#TODO(mattymo): support remote file such as ssh://user@myhost/backup.tar.lrz +# or http://myhost/backup.tar.lrz + if check_nailgun_tasks; then + echo "There are currently running Fuel tasks. Please wait for them to \ +finish or cancel them. Run \"fuel task list\" for more details." 1>&2 + exit 1 + fi + verify_disk_space "restore" + backupfile=$1 + if [ -z "$backupfile" ]; then + #TODO(mattymo): Parse BACKUP_DIR for lrz files + echo "Specify a backup file to restore" 1>&2 + exit 1 + elif ! [ -f "$backupfile" ]; then + echo "Archive does not exist: $backupfile" 1>&2 + exit 1 + elif ! [[ "$backupfile" =~ lrz$ ]]; then + echo "Archive does not have lrz extension." 1>&2 + exit 2 + fi + timestamp=$(echo $backupfile | sed -n 's/.*\([0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]_[0-9][0-9][0-9][0-9]\).*/\1/p') + if [ -z "$timestamp" ]; then + echo "Unable to parse timestamp in archive." 1>&2 + exit 3 + fi + restoredir="$BACKUP_ROOT/restore-$timestamp/" + disable_supervisor + unpack_archive "$backupfile" "$restoredir" + restore_images "$restoredir" + rename_images "$timestamp" + restore_systemdirs "$restoredir" + echo "Stopping and destroying existing containers..." + destroy_container all + echo "Preparing storage containers..." + run_storage_containers + echo "Starting application containers..." + start_container all + enable_supervisor + for container in $CONTAINER_SEQUENCE; do + check_ready $container + done +} + +function unpack_archive { +#feedback as everything restores + backupfile="$1" + restoredir="$2" + mkdir -p "$restoredir" + lrzip -d -o "$restoredir/fuel_backup.tar" $backupfile + tar -xf "$restoredir/fuel_backup.tar" -C "$restoredir" && rm -f "$restoredir/fuel_backup.tar" +} + +function restore_images { + restoredir="$1" + for imgfile in $restoredir/*.tar; do + echo "Loading $imgfile..." + if ! [[ "$imgfile" =~ system-dirs ]] && ! [[ "$imgfile" =~ fuel_backup.tar ]]; then + ${DOCKER} load -i $imgfile + fi + #rm -f $imgfile + done +} + +function rename_images { + timestamp="$1" + while read containername; do + oldname=$containername + newname=$(echo $containername | sed -n "s/_${timestamp}//p") + docker tag "$oldname" "$newname" + docker rmi "$oldname" + done < <(docker images | grep $timestamp | cut -d' ' -f1) +} + +function restore_systemdirs { + restoredir="$1" + tar xf $restoredir/system-dirs.tar -C / +} + +function disable_supervisor { + supervisorctl shutdown +} + +function enable_supervisor { + service supervisord start +} +function verify_disk_space { + if [ -z "$1" ]; then + echo "Backup or restore operation not specified." 1>&2 + exit 1 + fi + + #11gb free space required to backup and restore + (( required = 11 * 1024 * 1024 )) + avail=$(df /var | grep /var | awk '{print $4}') + if (( avail < required )); then + echo "Insufficient disk space to perform $1. At least 11gb must be free on /var partition." 1>&2 + exit 1 + fi +} diff --git a/files/fuel-docker-utils/get_service_credentials.py b/files/fuel-docker-utils/get_service_credentials.py new file mode 100644 index 0000000000..c4ffdf4296 --- /dev/null +++ b/files/fuel-docker-utils/get_service_credentials.py @@ -0,0 +1,28 @@ +#!/usr/bin/python +# Copyright 2014 Mirantis, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import sys +import yaml + +astuteyaml=sys.argv[1] +data=yaml.load(open(astuteyaml)) +for outerkey in data.keys(): + if isinstance(data[outerkey], dict): + for innerkey in data[outerkey].keys(): + print("%s_%s=\'%s\'" % (outerkey, innerkey, data[outerkey][innerkey])) + else: + print("%s=\'%s\'" % (outerkey, data[outerkey])) + diff --git a/files/fuel-ha-utils/ocf/ceilometer-agent-central b/files/fuel-ha-utils/ocf/ceilometer-agent-central new file mode 100644 index 0000000000..f962ca9466 --- /dev/null +++ b/files/fuel-ha-utils/ocf/ceilometer-agent-central @@ -0,0 +1,351 @@ +#!/bin/sh +# +# +# OpenStack Ceilometer Central Agent Service (ceilometer-agent-central) +# +# Description: Manages an OpenStack Ceilometer Central Agent Service (ceilometer-agent-central) process as an HA resource +# +# Authors: Emilien Macchi +# Mainly inspired by the Nova Scheduler resource agent written by Sebastien Han +# +# Support: openstack@lists.launchpad.net +# License: Apache Software License (ASL) 2.0 +# +# +# See usage() function below for more details ... +# +# OCF instance parameters: +# OCF_RESKEY_binary +# OCF_RESKEY_config +# OCF_RESKEY_user +# OCF_RESKEY_pid +# OCF_RESKEY_monitor_binary +# OCF_RESKEY_amqp_server_port +# OCF_RESKEY_additional_parameters +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +OCF_RESKEY_binary_default="ceilometer-agent-central" +OCF_RESKEY_config_default="/etc/ceilometer/ceilometer.conf" +OCF_RESKEY_user_default="ceilometer" +OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" +OCF_RESKEY_amqp_server_port_default="5673" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}} + +####################################################################### + +usage() { + cat < + + +1.0 + + +Resource agent for the OpenStack Ceilometer Central Agent Service (ceilometer-agent-central) +May manage a ceilometer-agent-central instance or a clone set that +creates a distributed ceilometer-agent-central cluster. + +Manages the OpenStack Ceilometer Central Agent Service (ceilometer-agent-central) + + + + +Location of the OpenStack Ceilometer Central Agent server binary (ceilometer-agent-central) + +OpenStack Ceilometer Central Agent server binary (ceilometer-agent-central) + + + + + +Location of the OpenStack Ceilometer Central Agent Service (ceilometer-agent-central) configuration file + +OpenStack Ceilometer Central Agent (ceilometer-agent-central registry) config file + + + + + +User running OpenStack Ceilometer Central Agent Service (ceilometer-agent-central) + +OpenStack Ceilometer Central Agent Service (ceilometer-agent-central) user + + + + + +The pid file to use for this OpenStack Ceilometer Central Agent Service (ceilometer-agent-central) instance + +OpenStack Ceilometer Central Agent Service (ceilometer-agent-central) pid file + + + + + +The listening port number of the AMQP server. Use for monitoring purposes + +AMQP listening port + + + + + +Additional parameters to pass on to the OpenStack Ceilometer Central Agent Service (ceilometer-agent-central) + +Additional parameters for ceilometer-agent-central + + + + + + + + + + + + + + +END +} + +####################################################################### +# Functions invoked by resource manager actions + +ceilometer_agent_central_check_port() { +# This function has been taken from the squid RA and improved a bit +# The length of the integer must be 4 +# Examples of valid port: "1080", "0080" +# Examples of invalid port: "1080bad", "0", "0000", "" + + local int + local cnt + + int="$1" + cnt=${#int} + echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*' + + if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then + ocf_log err "Invalid port number: $1" + exit $OCF_ERR_CONFIGURED + fi +} + +ceilometer_agent_central_validate() { + local rc + + check_binary $OCF_RESKEY_binary + check_binary netstat + ceilometer_agent_central_check_port $OCF_RESKEY_amqp_server_port + + # A config file on shared storage that is not available + # during probes is OK. + if [ ! -f $OCF_RESKEY_config ]; then + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + true +} + +ceilometer_agent_central_status() { + local pid + local rc + + # check and make PID file dir + local PID_DIR="$( dirname ${OCF_RESKEY_pid} )" + if [ ! -d "${PID_DIR}" ] ; then + ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}" + mkdir -p "${PID_DIR}" + chown -R ${OCF_RESKEY_user} "${PID_DIR}" + chmod 755 "${PID_DIR}" + fi + + if [ ! -f $OCF_RESKEY_pid ]; then + ocf_log info "OpenStack Ceilometer Central Agent (ceilometer-agent-central) is not running" + return $OCF_NOT_RUNNING + else + pid=`cat $OCF_RESKEY_pid` + fi + + if [ -n "${pid}" ]; then + ocf_run -warn kill -s 0 $pid + rc=$? + else + ocf_log err "PID file ${OCF_RESKEY_pid} is empty!" + return $OCF_ERR_GENERIC + fi + + if [ $rc -eq 0 ]; then + return $OCF_SUCCESS + else + ocf_log info "Old PID file found, but OpenStack Ceilometer Central Agent (ceilometer-agent-central) is not running" + return $OCF_NOT_RUNNING + fi +} + +ceilometer_agent_central_monitor() { + local rc + local pid + local scheduler_amqp_check + + ceilometer_agent_central_status + rc=$? + + # If status returned anything but success, return that immediately + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + ocf_log debug "OpenStack Ceilometer Central Agent (ceilometer-agent-central) monitor succeeded" + return $OCF_SUCCESS +} + +ceilometer_agent_central_start() { + local rc + + ceilometer_agent_central_status + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log info "OpenStack Ceilometer Central Agent (ceilometer-agent-central) already running" + return $OCF_SUCCESS + fi + + # run the actual ceilometer-agent-central daemon. Don't use ocf_run as we're sending the tool's output + # straight to /dev/null anyway and using ocf_run would break stdout-redirection here. + su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \ + $OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid + + ocf_log debug "Create pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + # Spin waiting for the server to come up. + while true; do + ceilometer_agent_central_monitor + rc=$? + [ $rc -eq $OCF_SUCCESS ] && break + if [ $rc -ne $OCF_NOT_RUNNING ]; then + ocf_log err "OpenStack Ceilometer Central Agent (ceilometer-agent-central) start failed" + exit $OCF_ERR_GENERIC + fi + sleep 3 + done + + ocf_log info "OpenStack Ceilometer Central Agent (ceilometer-agent-central) started" + return $OCF_SUCCESS +} + +ceilometer_agent_central_stop() { + local rc + local pid + + ceilometer_agent_central_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + ocf_log info "OpenStack Ceilometer Central Agent (ceilometer-agent-central) already stopped" + return $OCF_SUCCESS + fi + + # Try SIGTERM + pid=`cat $OCF_RESKEY_pid` + ocf_run kill -s TERM $pid + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "OpenStack Ceilometer Central Agent (ceilometer-agent-central) couldn't be stopped" + exit $OCF_ERR_GENERIC + fi + + # stop waiting + shutdown_timeout=15 + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) + fi + count=0 + while [ $count -lt $shutdown_timeout ]; do + ceilometer_agent_central_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + break + fi + count=`expr $count + 1` + sleep 1 + ocf_log debug "OpenStack Ceilometer Central Agent (ceilometer-agent-central) still hasn't stopped yet. Waiting ..." + done + + ceilometer_agent_central_status + rc=$? + if [ $rc -ne $OCF_NOT_RUNNING ]; then + # SIGTERM didn't help either, try SIGKILL + ocf_log info "OpenStack Ceilometer Central Agent (ceilometer-agent-central) failed to stop after ${shutdown_timeout}s \ + using SIGTERM. Trying SIGKILL ..." + ocf_run kill -s KILL $pid + fi + + ocf_log info "OpenStack Ceilometer Central Agent (ceilometer-agent-central) stopped" + + ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat $OCF_RESKEY_pid)" + rm -f $OCF_RESKEY_pid + + return $OCF_SUCCESS +} + +####################################################################### + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +ceilometer_agent_central_validate || exit $? + +# What kind of method was invoked? +case "$1" in + start) ceilometer_agent_central_start;; + stop) ceilometer_agent_central_stop;; + status) ceilometer_agent_central_status;; + monitor) ceilometer_agent_central_monitor;; + validate-all) ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/files/fuel-ha-utils/ocf/ceilometer-alarm-evaluator b/files/fuel-ha-utils/ocf/ceilometer-alarm-evaluator new file mode 100644 index 0000000000..7bde704608 --- /dev/null +++ b/files/fuel-ha-utils/ocf/ceilometer-alarm-evaluator @@ -0,0 +1,338 @@ +#!/bin/sh +# +# +# OpenStack Ceilometer Alarm Evaluator Service (ceilometer-alarm-evaluator) +# +# Description: Manages an OpenStack Ceilometer Alarm Evaluator Service (ceilometer-alarm-evaluator) process as an HA resource +# +# Authors: Emilien Macchi +# Mainly inspired by the Nova Scheduler resource agent written by Sebastien Han +# +# Support: openstack@lists.launchpad.net +# License: Apache Software License (ASL) 2.0 +# +# +# See usage() function below for more details ... +# +# OCF instance parameters: +# OCF_RESKEY_binary +# OCF_RESKEY_config +# OCF_RESKEY_user +# OCF_RESKEY_pid +# OCF_RESKEY_monitor_binary +# OCF_RESKEY_additional_parameters +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +OCF_RESKEY_binary_default="ceilometer-alarm-evaluator" +OCF_RESKEY_config_default="/etc/ceilometer/ceilometer.conf" +OCF_RESKEY_user_default="ceilometer" +OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} + +####################################################################### + +usage() { + cat < + + +1.0 + + +Resource agent for the OpenStack Ceilometer Alarm Evaluator Service (ceilometer-alarm-evaluator) +May manage a ceilometer-alarm-evaluatorinstance or a clone set that +creates a distributed ceilometer-alarm-evaluator cluster. + +Manages the OpenStack Ceilometer Alarm Evaluator Service (ceilometer-alarm-evaluator) + + + + +Location of the OpenStack Ceilometer Alarm Evaluator server binary (ceilometer-alarm-evaluator) + +OpenStack Ceilometer Alarm Evaluator server binary (ceilometer-alarm-evaluator) + + + + + +Location of the OpenStack Ceilometer Alarm Evaluator Service (ceilometer-alarm-evaluator) configuration file + +OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) config file + + + + + +User running OpenStack Ceilometer Alarm Evaluator Service (ceilometer-alarm-evaluator) + +OpenStack Ceilometer Alarm Evaluator Service (ceilometer-alarm-evaluator) user + + + + + +The pid file to use for this OpenStack Ceilometer Alarm Evaluator Service (ceilometer-alarm-evaluator) instance + +OpenStack Ceilometer Alarm Evaluator Service (ceilometer-alarm-evaluator) pid file + + + + + +Additional parameters to pass on to the OpenStack Ceilometer Alarm Evaluator Service (ceilometer-alarm-evaluator) + +Additional parameters for ceilometer-alarm-evaluator + + + + + + + + + + + + + + +END +} + +####################################################################### +# Functions invoked by resource manager actions + +ceilometer_alarm_evaluator_check_port() { +# This function has been taken from the squid RA and improved a bit +# The length of the integer must be 4 +# Examples of valid port: "1080", "0080" +# Examples of invalid port: "1080bad", "0", "0000", "" + + local int + local cnt + + int="$1" + cnt=${#int} + echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*' + + if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then + ocf_log err "Invalid port number: $1" + exit $OCF_ERR_CONFIGURED + fi +} + +ceilometer_alarm_evaluator_validate() { + local rc + + check_binary $OCF_RESKEY_binary + check_binary netstat + + # A config file on shared storage that is not available + # during probes is OK. + if [ ! -f $OCF_RESKEY_config ]; then + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + true +} + +ceilometer_alarm_evaluator_status() { + local pid + local rc + + # check and make PID file dir + local PID_DIR="$( dirname ${OCF_RESKEY_pid} )" + if [ ! -d "${PID_DIR}" ] ; then + ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}" + mkdir -p "${PID_DIR}" + chown -R ${OCF_RESKEY_user} "${PID_DIR}" + chmod 755 "${PID_DIR}" + fi + + if [ ! -f $OCF_RESKEY_pid ]; then + ocf_log info "OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) is not running" + return $OCF_NOT_RUNNING + else + pid=`cat $OCF_RESKEY_pid` + fi + + if [ -n "${pid}" ]; then + ocf_run -warn kill -s 0 $pid + rc=$? + else + ocf_log err "PID file ${OCF_RESKEY_pid} is empty!" + return $OCF_ERR_GENERIC + fi + + if [ $rc -eq 0 ]; then + return $OCF_SUCCESS + else + ocf_log info "Old PID file found, but OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) is not running" + return $OCF_NOT_RUNNING + fi +} + +ceilometer_alarm_evaluator_monitor() { + local rc + local pid + + ceilometer_alarm_evaluator_status + rc=$? + + # If status returned anything but success, return that immediately + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + ocf_log debug "OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) monitor succeeded" + return $OCF_SUCCESS +} + +ceilometer_alarm_evaluator_start() { + local rc + + ceilometer_alarm_evaluator_status + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log info "OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) already running" + return $OCF_SUCCESS + fi + + # run the actual ceilometer-alarm-evaluator daemon. Don't use ocf_run as we're sending the tool's output + # straight to /dev/null anyway and using ocf_run would break stdout-redirection here. + su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \ + $OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid + + ocf_log debug "Create pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + # Spin waiting for the server to come up. + while true; do + ceilometer_alarm_evaluator_monitor + rc=$? + [ $rc -eq $OCF_SUCCESS ] && break + if [ $rc -ne $OCF_NOT_RUNNING ]; then + ocf_log err "OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) start failed" + exit $OCF_ERR_GENERIC + fi + sleep 1 + done + + ocf_log info "OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) started" + return $OCF_SUCCESS +} + +ceilometer_alarm_evaluator_stop() { + local rc + local pid + + ceilometer_alarm_evaluator_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + ocf_log info "OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) already stopped" + return $OCF_SUCCESS + fi + + # Try SIGTERM + pid=`cat $OCF_RESKEY_pid` + ocf_run kill -s TERM $pid + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) couldn't be stopped" + exit $OCF_ERR_GENERIC + fi + + # stop waiting + shutdown_timeout=15 + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) + fi + count=0 + while [ $count -lt $shutdown_timeout ]; do + ceilometer_alarm_evaluator_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + break + fi + count=`expr $count + 1` + sleep 1 + ocf_log debug "OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) still hasn't stopped yet. Waiting ..." + done + + ceilometer_alarm_evaluator_status + rc=$? + if [ $rc -ne $OCF_NOT_RUNNING ]; then + # SIGTERM didn't help either, try SIGKILL + ocf_log info "OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) failed to stop after ${shutdown_timeout}s \ + using SIGTERM. Trying SIGKILL ..." + ocf_run kill -s KILL $pid + fi + + ocf_log info "OpenStack Ceilometer Alarm Evaluator (ceilometer-alarm-evaluator) stopped" + + ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + rm -f $OCF_RESKEY_pid + + return $OCF_SUCCESS +} + +####################################################################### + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +ceilometer_alarm_evaluator_validate || exit $? + +# What kind of method was invoked? +case "$1" in + start) ceilometer_alarm_evaluator_start;; + stop) ceilometer_alarm_evaluator_stop;; + status) ceilometer_alarm_evaluator_status;; + monitor) ceilometer_alarm_evaluator_monitor;; + validate-all) ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/files/fuel-ha-utils/ocf/haproxy b/files/fuel-ha-utils/ocf/haproxy new file mode 100755 index 0000000000..c6537b98fc --- /dev/null +++ b/files/fuel-ha-utils/ocf/haproxy @@ -0,0 +1,281 @@ +#!/bin/bash +# +# Resource script for haproxy daemon +# +# Description: Manages haproxy daemon as an OCF resource in +# an High Availability setup. +# +# HAProxy OCF script's Author: Russki +# Rsync OCF script's Author: Dhairesh Oza +# License: GNU General Public License (GPL) +# +# +# usage: $0 {start|stop|status|monitor|validate-all|meta-data} +# +# The "start" arg starts haproxy. +# +# The "stop" arg stops it. +# +# OCF parameters: +# OCF_RESKEY_binpath +# OCF_RESKEY_conffile +# OCF_RESKEY_extraconf +# +# Note:This RA requires that the haproxy config files has a "pidfile" +# entry so that it is able to act on the correct process +########################################################################## +# Initialization: + +OCF_RESKEY_conffile_default="/etc/haproxy/haproxy.cfg" +OCF_RESKEY_pidfile_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" +OCF_RESKEY_binpath_default="/usr/sbin/haproxy" + +: ${OCF_RESKEY_conffile=${OCF_RESKEY_conffile_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_binpath=${OCF_RESKEY_binpath_default}} +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat} +. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs + +USAGE="Usage: $0 {start|stop|status|monitor|validate-all|meta-data}"; + +########################################################################## + +usage() +{ + echo $USAGE >&2 +} + +meta_data() +{ +cat < + + +1.0 + +This script manages haproxy daemon + +Manages an haproxy daemon + + + + + +The haproxy binary path. +For example, "/usr/sbin/haproxy" + +Full path to the haproxy binary + + + + + +The haproxy daemon configuration file name with full path. +For example, "/etc/haproxy/haproxy.cfg" + +Configuration file name with full path + + + + + +Extra command line arguments to pass to haproxy. +For example, "-f /etc/haproxy/shared.cfg" + +Extra command line arguments for haproxy + + + + + + + + + + + + + + +END +exit $OCF_SUCCESS +} + +get_variables() +{ + CONF_FILE="${OCF_RESKEY_conffile}" + COMMAND="${OCF_RESKEY_binpath}" + + if [ -n "${OCF_RESKEY_pidfile}" ]; then + PIDFILE=$(grep -v "#" ${CONF_FILE} | grep "pidfile" | sed 's/^[ \t]*pidfile[ \t]*//') + else + PIDFILE="${OCF_RESKEY_pidfile}" + fi +} + +haproxy_status() +{ + get_variables + + # check and make PID file dir + local PID_DIR="$( dirname ${PIDFILE} )" + if [ ! -d "${PID_DIR}" ] ; then + ocf_log debug "Create pid file dir: ${PID_DIR}" + mkdir -p "${PID_DIR}" + # no need to chown, root is user for haproxy + chmod 755 "${PID_DIR}" + fi + + if [ -n "${PIDFILE}" -a -f "${PIDFILE}" ]; then + # haproxy is probably running + # get pid from pidfile + PID="`cat ${PIDFILE}`" + if [ -n "${PID}" ]; then + # check if process exists + if ps -p "${PID}" | grep -q haproxy; then + ocf_log info "haproxy daemon running" + return $OCF_SUCCESS + else + ocf_log info "haproxy daemon is not running but pid file exists" + return $OCF_NOT_RUNNING + fi + else + ocf_log err "PID file empty!" + return $OCF_ERR_GENERIC + fi + fi + # haproxy is not running + ocf_log info "haproxy daemon is not running" + return $OCF_NOT_RUNNING +} + +haproxy_start() +{ + get_variables + + # if haproxy is running return success + haproxy_status + retVal=$? + if [ $retVal -eq $OCF_SUCCESS ]; then + exit $OCF_SUCCESS + elif [ $retVal -ne $OCF_NOT_RUNNING ]; then + ocf_log err "Error. Unknown status." + exit $OCF_ERR_GENERIC + fi + + # run the haproxy binary + "${COMMAND}" ${OCF_RESKEY_extraconf} -f "${CONF_FILE}" -p "${PIDFILE}" + if [ $? -ne 0 ]; then + ocf_log err "Error. haproxy daemon returned error $?." + exit $OCF_ERR_GENERIC + fi + + ocf_log info "Started haproxy daemon." + exit $OCF_SUCCESS +} + +haproxy_reload() +{ + get_variables + if haproxy_status; then + # get pid from pidfile + PID="`cat ${PIDFILE}`" + # reload haproxy binary replacing the old process + "${COMMAND}" ${OCF_RESKEY_extraconf} -f "${CONF_FILE}" -p "${PIDFILE}" -sf "${PID}" + if [ $? -ne 0 ]; then + ocf_log err "Error. haproxy daemon returned error $?." + exit $OCF_ERR_GENERIC + fi + else + ocf_log info "Haproxy daemon is not running. Starting it." + haproxy_start + fi +} + +haproxy_stop() +{ + get_variables + if haproxy_status ; then + PID="`cat ${PIDFILE}`" + if [ -n "${PID}" ] ; then + kill "${PID}" + if [ $? -ne 0 ]; then + kill -SIGKILL "${PID}" + if [ $? -ne 0 ]; then + ocf_log err "Error. Could not stop haproxy daemon." + return $OCF_ERR_GENERIC + fi + fi + ocf_log debug "Delete pid file: ${PIDFILE} with content ${PID}" + rm -f "${PIDFILE}" + fi + fi + ocf_log info "Stopped haproxy daemon." + exit $OCF_SUCCESS +} + +haproxy_monitor() +{ + haproxy_status +} + +haproxy_validate_all() +{ + get_variables + if [ -n "$OCF_RESKEY_binpath" -a ! -x "$OCF_RESKEY_binpath" ]; then + ocf_log err "Binary path $OCF_RESKEY_binpath does not exist." + exit $OCF_ERR_ARGS + fi + if [ -n "$OCF_RESKEY_conffile" -a ! -f "$OCF_RESKEY_conffile" ]; then + ocf_log err "Config file $OCF_RESKEY_conffile does not exist." + exit $OCF_ERR_ARGS + fi + + if grep -v "^#" "$CONF_FILE" | grep "pidfile" > /dev/null ; then + : + else + ocf_log err "Error. \"pidfile\" entry required in the haproxy config file by haproxy OCF RA." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + + +# +# Main +# + +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi + +case $1 in + start) haproxy_start + ;; + + stop) haproxy_stop + ;; + + reload) haproxy_reload + ;; + + status) haproxy_status + ;; + + monitor) haproxy_monitor + ;; + + validate-all) haproxy_validate_all + ;; + + meta-data) meta_data + ;; + + usage) usage; exit $OCF_SUCCESS + ;; + + *) usage; exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/files/fuel-ha-utils/ocf/heat_engine_centos b/files/fuel-ha-utils/ocf/heat_engine_centos new file mode 100644 index 0000000000..129309a72d --- /dev/null +++ b/files/fuel-ha-utils/ocf/heat_engine_centos @@ -0,0 +1,353 @@ +#!/bin/sh +# +# OpenStack Heat Engine OCF script +# +# Description: Manages OpenStack Heat Engine process as a HA resource +# +# Authors: Mirantis inc. +# +# Support: openstack@lists.launchpad.net +# License: Apache Software License (ASL) 2.0 +# +# See usage() function below for more details ... +# +# OCF instance parameters: +# OCF_RESKEY_binary +# OCF_RESKEY_config +# OCF_RESKEY_user +# OCF_RESKEY_pid +# OCF_RESKEY_amqp_server_port +# OCF_RESKEY_additional_parameters +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +SERVICE_NAME="OpenStack Heat Engine" +LOG="/var/log/heat/heat-engine.log" + +OCF_RESKEY_binary_default="/usr/bin/heat-engine" +OCF_RESKEY_config_default="/etc/heat/heat.conf" +OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" +OCF_RESKEY_amqp_server_port_default="5673" +OCF_RESKEY_user_default="heat" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}} + +####################################################################### + +usage() { + cat < + + +1.0 + + +Manages OpenStack Heat Engine daemon as a Pacemaker Resource. +Heat is used to deploy instances based on predefined templates. +There should be only one active instace of Heat Engine in an OpenStack cluster. +Heat Engine connects to its databse and uses AMQP service to communicate with +Heat API's processes ans REST interface to communicate with Nova API. + +Manages OpenStack Heat + + + + +Name of Heat's binary file that will be run. + +Heat binary file + + + + + +Path to configuration file used by Heat Engine daemon + +Heat configuration file + + + + + +User running Heat Engine + +Heat user + + + + + +The pid file to use for this process + +Heat pid file + + + + + +The listening port number of the AMQP server. Use for monitoring purposes + +AMQP listening port + + + + + +Additional parameters to pass on to the Heat engine if you do need them + +Additional parameters for the Heat engine + + + + + + + + + + + + + + +END +} + +####################################################################### +# Functions invoked by resource manager actions + +heat_engine_check_port() { +# This function has been taken from the squid RA and improved a bit +# The length of the integer must be 4 +# Examples of valid port: "1080", "0080" +# Examples of invalid port: "1080bad", "0", "0000", "" + + local int + local cnt + + int="$1" + cnt=${#int} + echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*' + + if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then + ocf_log err "Invalid port number: $1" + exit $OCF_ERR_CONFIGURED + fi +} + +service_validate() { + local rc + + check_binary $OCF_RESKEY_binary + check_binary netstat + heat_engine_check_port $OCF_RESKEY_amqp_server_port + + # A config file on shared storage that is not available + # during probes is OK. + if [ ! -f $OCF_RESKEY_config ]; then + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + true +} + +service_status() { + local pid + local rc + + # check and make PID file dir + local PID_DIR="$( dirname ${OCF_RESKEY_pid} )" + if [ ! -d "${PID_DIR}" ] ; then + mkdir -p "${PID_DIR}" + chown -R ${OCF_RESKEY_user} "${PID_DIR}" + chmod 755 "${PID_DIR}" + fi + + if [ ! -f $OCF_RESKEY_pid ]; then + ocf_log info "${SERVICE_NAME} is not running" + return $OCF_NOT_RUNNING + else + pid=`cat $OCF_RESKEY_pid` + fi + + if [ -n "${pid}" ]; then + ocf_run -warn kill -s 0 $pid + rc=$? + else + ocf_log err "PID file ${OCF_RESKEY_pid} is empty!" + return $OCF_ERR_GENERIC + fi + + if [ $rc -eq 0 ]; then + return $OCF_SUCCESS + else + ocf_log info "Old PID file found, but ${SERVICE_NAME} is not running" + return $OCF_NOT_RUNNING + fi +} + +service_monitor() { + local rc + local pid + local rc_amqp + local engine_amqp_check + + service_status + rc=$? + + # If status returned anything but success, return that immediately + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + ocf_log debug "OpenStack Orchestration Engine (heat-engine) monitor succeeded" + return $OCF_SUCCESS +} + +service_start() { + local rc + + service_status + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log info "${SERVICE_NAME} is already running" + return $OCF_SUCCESS + fi + + # source init and venv + . /etc/rc.d/init.d/functions + # run the actual daemon. + daemon --user "${OCF_RESKEY_user}" --pidfile "${OCF_RESKEY_pid}" "${OCF_RESKEY_binary} --config-file ${OCF_RESKEY_config} --logfile ${LOG} &>/dev/null & echo \$! > ${OCF_RESKEY_pid}" + + # Spin waiting for the server to come up. + # Let the CRM/LRM time us out if required + while true; do + service_monitor + rc=$? + [ $rc -eq $OCF_SUCCESS ] && break + if [ $rc -ne $OCF_NOT_RUNNING ]; then + ocf_log err "${SERVICE_NAME} start failed" + exit $OCF_ERR_GENERIC + fi + sleep 3 + done + + ocf_log info "${SERVICE_NAME} started" + return $OCF_SUCCESS +} + +service_stop() { + local rc + local pid + + service_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + ocf_log info "${SERVICE_NAME} is already stopped" + return $OCF_SUCCESS + fi + + # Try SIGTERM + pid=`cat $OCF_RESKEY_pid` + ocf_run kill -s TERM $pid + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "${SERVICE_NAME} couldn't be stopped" + exit $OCF_ERR_GENERIC + fi + + # stop waiting + shutdown_timeout=15 + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) + fi + count=0 + while [ $count -lt $shutdown_timeout ]; do + service_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + break + fi + count=`expr $count + 1` + sleep 1 + ocf_log debug "${SERVICE_NAME} still hasn't stopped yet. Waiting ..." + done + + service_status + rc=$? + if [ "${rc}" -ne "${OCF_NOT_RUNNING}" ]; then + # SIGTERM didn't help either, try SIGKILL + ocf_log info "${SERVICE_NAME} failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL ..." + ocf_run kill -s KILL "${pid}" + fi + + ocf_log info "${SERVICE_NAME} stopped" + + ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + rm -f "${OCF_RESKEY_pid}" + + return "${OCF_SUCCESS}" +} + +####################################################################### + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +service_validate || exit $? + +# What kind of method was invoked? +case "$1" in + start) service_start;; + stop) service_stop;; + status) service_status;; + monitor) service_monitor;; + validate-all) ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac + + diff --git a/files/fuel-ha-utils/ocf/heat_engine_ubuntu b/files/fuel-ha-utils/ocf/heat_engine_ubuntu new file mode 100644 index 0000000000..0cbe177417 --- /dev/null +++ b/files/fuel-ha-utils/ocf/heat_engine_ubuntu @@ -0,0 +1,354 @@ +#!/bin/sh +# +# OpenStack Heat Engine OCF script +# +# Description: Manages OpenStack Heat Engine process as a HA resource +# +# Authors: Mirantis inc. +# +# Support: openstack@lists.launchpad.net +# License: Apache Software License (ASL) 2.0 +# +# See usage() function below for more details ... +# +# OCF instance parameters: +# OCF_RESKEY_binary +# OCF_RESKEY_config +# OCF_RESKEY_user +# OCF_RESKEY_pid +# OCF_RESKEY_amqp_server_port +# OCF_RESKEY_additional_parameters +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +SERVICE_NAME="OpenStack Heat Engine" +LOG="/var/log/heat/heat-engine.log" + +OCF_RESKEY_binary_default="/usr/bin/heat-engine" +OCF_RESKEY_config_default="/etc/heat/heat.conf" +OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" +OCF_RESKEY_amqp_server_port_default="5673" +OCF_RESKEY_user_default="heat" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}} + +####################################################################### + +usage() { + cat < + + +1.0 + + +Manages OpenStack Heat Engine daemon as a Pacemaker Resource. +Heat is used to deploy instances based on predefined templates. +There should be only one active instace of Heat Engine in an OpenStack cluster. +Heat Engine connects to its databse and uses AMQP service to communicate with +Heat API's processes ans REST interface to communicate with Nova API. + +Manages OpenStack Heat + + + + +Name of Heat's binary file that will be run. + +Heat binary file + + + + + +Path to configuration file used by Heat Engine daemon + +Heat configuration file + + + + + +User running Heat Engine + +Heat user + + + + + +The pid file to use for this process + +Heat pid file + + + + + +The listening port number of the AMQP server. Use for monitoring purposes + +AMQP listening port + + + + + +Additional parameters to pass on to the Heat engine if you do need them + +Additional parameters for the Heat engine + + + + + + + + + + + + + + +END +} + +####################################################################### +# Functions invoked by resource manager actions + +heat_engine_check_port() { +# This function has been taken from the squid RA and improved a bit +# The length of the integer must be 4 +# Examples of valid port: "1080", "0080" +# Examples of invalid port: "1080bad", "0", "0000", "" + + local int + local cnt + + int="$1" + cnt=${#int} + echo $int |egrep -qx '[0-9]+(:[0-9]+)?(,[0-9]+(:[0-9]+)?)*' + + if [ $? -ne 0 ] || [ $cnt -ne 4 ]; then + ocf_log err "Invalid port number: $1" + exit $OCF_ERR_CONFIGURED + fi +} + +service_validate() { + local rc + + check_binary $OCF_RESKEY_binary + check_binary netstat + heat_engine_check_port $OCF_RESKEY_amqp_server_port + + # A config file on shared storage that is not available + # during probes is OK. + if [ ! -f $OCF_RESKEY_config ]; then + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + true +} + +service_status() { + local pid + local rc + + # check and make PID file dir + local PID_DIR="$( dirname ${OCF_RESKEY_pid} )" + if [ ! -d "${PID_DIR}" ] ; then + ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}" + mkdir -p "${PID_DIR}" + chown -R ${OCF_RESKEY_user} "${PID_DIR}" + chmod 755 "${PID_DIR}" + fi + + if [ ! -f $OCF_RESKEY_pid ]; then + ocf_log info "${SERVICE_NAME} is not running" + return $OCF_NOT_RUNNING + else + pid=`cat $OCF_RESKEY_pid` + fi + + if [ -n "${pid}" ]; then + ocf_run -warn kill -s 0 $pid + rc=$? + else + ocf_log err "PID file ${OCF_RESKEY_pid} is empty!" + return $OCF_ERR_GENERIC + fi + + if [ $rc -eq 0 ]; then + return $OCF_SUCCESS + else + ocf_log info "Old PID file found, but ${SERVICE_NAME} is not running" + return $OCF_NOT_RUNNING + fi +} + +service_monitor() { + local rc + local pid + local rc_amqp + local engine_amqp_check + + service_status + rc=$? + + # If status returned anything but success, return that immediately + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + ocf_log debug "OpenStack Orchestration Engine (heat-engine) monitor succeeded" + return $OCF_SUCCESS +} + +service_start() { + local rc + + service_status + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log info "${SERVICE_NAME} is already running" + return $OCF_SUCCESS + fi + + # source init and venv + . /lib/lsb/init-functions + # run the actual daemon. + start-stop-daemon --start --background --quiet --chuid "${OCF_RESKEY_user}:${OCF_RESKEY_user}" --make-pidfile --pidfile "${OCF_RESKEY_pid}" --startas "${OCF_RESKEY_binary}" + + # Spin waiting for the server to come up. + # Let the CRM/LRM time us out if required + while true; do + service_monitor + rc=$? + [ $rc -eq $OCF_SUCCESS ] && break + if [ $rc -ne $OCF_NOT_RUNNING ]; then + ocf_log err "${SERVICE_NAME} start failed" + exit $OCF_ERR_GENERIC + fi + sleep 3 + done + + ocf_log info "${SERVICE_NAME} started" + return $OCF_SUCCESS +} + +service_stop() { + local rc + local pid + + service_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + ocf_log info "${SERVICE_NAME} is already stopped" + return $OCF_SUCCESS + fi + + # Try SIGTERM + pid=`cat $OCF_RESKEY_pid` + ocf_run kill -s TERM $pid + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "${SERVICE_NAME} couldn't be stopped" + exit $OCF_ERR_GENERIC + fi + + # stop waiting + shutdown_timeout=15 + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) + fi + count=0 + while [ $count -lt $shutdown_timeout ]; do + service_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + break + fi + count=`expr $count + 1` + sleep 1 + ocf_log debug "${SERVICE_NAME} still hasn't stopped yet. Waiting ..." + done + + service_status + rc=$? + if [ "${rc}" -ne "${OCF_NOT_RUNNING}" ]; then + # SIGTERM didn't help either, try SIGKILL + ocf_log info "${SERVICE_NAME} failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL ..." + ocf_run kill -s KILL "${pid}" + fi + + ocf_log info "${SERVICE_NAME} stopped" + + ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + rm -f "${OCF_RESKEY_pid}" + + return "${OCF_SUCCESS}" +} + +####################################################################### + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +service_validate || exit $? + +# What kind of method was invoked? +case "$1" in + start) service_start;; + stop) service_stop;; + status) service_status;; + monitor) service_monitor;; + validate-all) ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac + + diff --git a/files/fuel-ha-utils/ocf/mysql-wss b/files/fuel-ha-utils/ocf/mysql-wss new file mode 100644 index 0000000000..e1e5f240a3 --- /dev/null +++ b/files/fuel-ha-utils/ocf/mysql-wss @@ -0,0 +1,701 @@ +#!/bin/bash +# Authors: Bartosz Kupidura (Mirantis): Rewrite RA to support mysql/galera +# Sergii Golovatiuk (Mirantis): Rewrite RA to support mysql/galera +# Alan Robertson: DB2 Script +# Jakub Janczak: rewrite as MySQL +# Andrew Beekhof: cleanup and import +# Sebastian Reitenbach: add OpenBSD defaults, more cleanup +# Narayan Newton: add Gentoo/Debian defaults +# Marian Marinov, Florian Haas: add replication capability +# Yves Trudeau, Baron Schwartz: add VIP support and improve replication +# +# Support: openstack@lists.launchpad.net +# License: GNU General Public License (GPL) +# +# (c) 2002-2005 International Business Machines, Inc. +# 2005-2010 Linux-HA contributors +# 2014 Mirantis Inc. +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +####################################################################### +# Fill in some defaults if no values are specified +OCF_RESKEY_binary_default="/usr/bin/mysqld_safe" +OCF_RESKEY_client_binary_default="/usr/bin/mysql" +OCF_RESKEY_config_default="/etc/my.cnf" +OCF_RESKEY_datadir_default="/var/lib/mysql" +OCF_RESKEY_user_default="mysql" +OCF_RESKEY_group_default="mysql" +OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" +OCF_RESKEY_socket_default="/var/lib/mysql/mysql.sock" +OCF_RESKEY_test_user_default="root" +OCF_RESKEY_test_passwd_default="" +OCF_RESKEY_additional_parameters_default="" +OCF_RESKEY_master_timeout_default="300" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +MYSQL_BINDIR="$(dirname ${OCF_RESKEY_binary})" + +: ${OCF_RESKEY_client_binary=${OCF_RESKEY_client_binary_default}} + +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_datadir=${OCF_RESKEY_datadir_default}} + +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}} + +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_socket=${OCF_RESKEY_socket_default}} + +: ${OCF_RESKEY_test_user=${OCF_RESKEY_test_user_default}} +: ${OCF_RESKEY_test_passwd=${OCF_RESKEY_test_passwd_default}} + +: ${OCF_RESKEY_additional_parameters=${OCF_RESKEY_additional_parameters_default}} +: ${OCF_RESKEY_master_timeout=${OCF_RESKEY_master_timeout_default}} + +####################################################################### +# Convenience variables +MYSQL=$OCF_RESKEY_client_binary +HOSTNAME=$(uname -n) +MYSQL_OPTIONS_LOCAL="-S $OCF_RESKEY_socket --connect_timeout=10" +MYSQL_OPTIONS_TEST="$MYSQL_OPTIONS_LOCAL --user=$OCF_RESKEY_test_user --password=$OCF_RESKEY_test_passwd" +####################################################################### +usage() { + cat < + + + 0.1 + + Resource script for MySQL + + Resource script for MySQL + + + + Location of the MySQL server binary + + MySQL server binary + + + + + Location of the MySQL client binary + + MySQL client binary + + + + + Configuration file + + MySQL config + + + + + Directory containing databases + + Data directory + + + + + User running MySQL daemon + + MySQL user + + + + + Group running MySQL daemon (for logfile and directory permissions) + + MySQL group + + + + + The pidfile to be used for mysqld. + + MySQL pid file + + + + + The socket to be used for mysqld. + + MySQL socket + + + + + MySQL test user, must have select privilege on 'show status' + + MySQL test user + + + + + MySQL test user password + + MySQL test user password + + + + + Additional parameters which are passed to the mysqld on startup. + (e.g. --skip-external-locking or --skip-grant-tables) + + Additional parameters to pass to mysqld + + + + + How long we should wait for galera master. If master not come up before timeout, + RA will choose new master from already running nodes. This value can by changed by crm_attribute: + # crm_attribute --name galera_master_timeout --update 500 + Remember to remove this after maintenance. USE WITH CAUTION! + Remember to change timeout for start operation. Start timeout should be bigger than master_timeout + + Galera master timeout + + + + + + + + + + + +END +} +# Convenience functions +####################################################################### +nodes_in_cluster_online() { + local NODES + + NODES=$(crm_node --partition | sed -e '/(null)/d') + if [ ! -z "$NODES" ]; then + echo $NODES + else + echo + fi +} +nodes_in_cluster() { + local NODES + + #Ubuntu doesn't like \w + NODES=$(crm_node --list | awk '/^[a-zA-Z0-9]/ {print $2}' | sed -e '/(null)/d') + if [ ! -z "$NODES" ]; then + echo $NODES + else + echo + fi +} + +#Validate if GTID have correct format (return 0), else return 1 +#valid values are: +#XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:123 - standard cluster-id:commit-id +#XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX:-1 - standard non initialized cluster, 00000000-0000-0000-0000-000000000000:-1 +validate_gtid() { + local rc + local status_loglevel="err" + + if [ -z $1 ]; then + ocf_log $status_loglevel "No GTID provided" + return 1 + fi + + echo $1 | grep -q -E '^\w{8}-\w{4}-\w{4}-\w{4}-\w{12}:([[:digit:]]|-1)' + rc=$? + + if [ $rc -ne 0 ]; then + ocf_log $status_loglevel "GTID have wrong format: $1" + return 1 + else + ocf_log info "GTID OK: $1" + return 0 + fi +} + +#Get galera GTID from local mysql instance +update_node_gtid() { + local status_loglevel="err" + local GTID + + # Set loglevel to info during probe + if ocf_is_probe; then + status_loglevel="info" + fi + + if mysql_status $status_loglevel 1; then + CLUSTER_ID=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \ + -e "SHOW STATUS LIKE 'wsrep_local_state_uuid'" | awk '{print $NF}') + COMMIT_ID=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \ + -e "SHOW STATUS LIKE 'wsrep_last_committed'" | awk '{print $NF}') + GTID="$CLUSTER_ID:$COMMIT_ID" + else + GTID=$(cat ${OCF_RESKEY_datadir}/grastate.dat \ + | awk '/uuid/ { uuid = $NF} /seqno/ { seqno = $NF} END {print uuid":"seqno}') + fi + + #Final try to recover + if [ -z ${GTID} ]; then + GTID=$(${OCF_RESKEY_binary} --wsrep-recover \ + --log-error=/dev/stdout 2>&1 | grep 'Recovered position' | awk '{print $NF}') + fi + + if validate_gtid "$GTID"; then + ocf_log info "Galera GTID: ${GTID}" + crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name gtid \ + --update $GTID + else + ocf_log info "Wrong GTID: ${GTID}, Removing" + crm_attribute --quiet --node $HOSTNAME --lifetime reboot --name gtid -D + fi +} + +get_master_timeout() { + local default=$OCF_RESKEY_master_timeout + local rc + local timeout=$(crm_attribute --quiet --name galera_master_timeout \ + --query -d $default -q | sed -e '/(null)/d') + rc=$? + + if [ $rc -eq 0 ]; then + echo $timeout + else + echo $default + fi +} + +#Get gtid attribute for $1 node, "0" means no GTID set or wrong format for GTID +get_node_gtid() { + local rc + local GTID + + GTID=$(crm_attribute --quiet --node $1 --lifetime reboot --query \ + --name gtid 2> /dev/null | sed -e '/(null)/d') + rc=$? + + if [ $rc -ne 0 -o -z "$GTID" ]; then + ocf_log info "No GTID for $1" + echo 0 + else + if validate_gtid "$GTID"; then + ocf_log info "Galera GTID: ${GTID}" + echo $GTID + else + ocf_log info "No GTID for $1" + echo 0 + fi + fi +} + +check_if_reelection_needed() { + local PARTITION_WITH_QUORUM=$(crm_node -q | sed -e '/(null)/d') + local RESOURCE_NAME=$(echo $OCF_RESOURCE_INSTANCE | cut -f1 -d":") + local NODE_COUNT=$(nodes_in_cluster | wc -w) + local RUNNING_INSTANCES + local rc + + if [ $PARTITION_WITH_QUORUM -eq 1 -o $NODE_COUNT -eq 1 ]; then + RUNNING_INSTANCES=$(crm_resource \ + --quiet --locate --resource $RESOURCE_NAME | sed -e '/(null)/d' | wc -l 2> /dev/null) + rc=$? + if [ $rc -eq $OCF_SUCCESS -a $RUNNING_INSTANCES -lt 1 ]; then + return 1 + fi + fi + + return 0 +} + +choose_master() { + local NODES=$1 + local -A TMP + + for NODE in $NODES; do + NODE_ID=$(echo $NODE | md5sum | awk '{print $1}') + TMP[$NODE_ID]=$NODE + done + + MASTER=$(printf -- '%s\n' "${!TMP[@]}" | sort | head -1) + ocf_log info "Choosed master: ${TMP[$MASTER]}" + echo ${TMP[$MASTER]} +} + +get_possible_masters() { + local NODES=$* + local POSSIBLE_MASTERS + local -A TMP + local MASTER_GTID + local GTID + + for NODE in $NODES; do + GTID=$(get_node_gtid $NODE) + TMP[$NODE]=$(echo $GTID|cut -d":" -f 2) + done + + MASTER_GTID=$(printf -- '%s\n' "${TMP[@]}" | sort -r | head -1) + + for NODE in $NODES; do + if [ $MASTER_GTID -eq ${TMP[$NODE]} ]; then + POSSIBLE_MASTERS="$POSSIBLE_MASTERS $NODE" + fi + done + + ocf_log info "Possible masters: $POSSIBLE_MASTERS" + echo $POSSIBLE_MASTERS +} + +check_if_galera_pc() { + local NODES + local MASTERS + local timeout=$(get_master_timeout) + + ocf_log info "Checking if Primary Component" + + while [ $timeout -gt 0 ]; do + NODES=$(nodes_in_cluster_online) + MASTERS=$(get_possible_masters "$NODES") + MASTER=$(choose_master "$MASTERS") + if [ "$MASTER" = "$HOSTNAME" ]; then + ocf_log info "I\'m Primary Component. Join me!" + return 1 + fi + + if check_if_reelection_needed; then + ocf_log info "My neighbour is Primary Component" + return 0 + fi + + sleep 10 + (( timeout -= 10 )) + ocf_log info "Waiting for master. ${timeout} seconds left" + done + + ocf_log info "${HOSTNAME} is not Primary Component" + return 0 +} +# Functions invoked by resource manager actions + +mysql_validate() { + check_binary $OCF_RESKEY_binary + check_binary $OCF_RESKEY_client_binary + + if [ ! -f $OCF_RESKEY_config ]; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist"; + return $OCF_ERR_INSTALLED; + fi + + if [ ! -d $OCF_RESKEY_datadir ]; then + ocf_log err "Datadir $OCF_RESKEY_datadir doesn't exist"; + return $OCF_ERR_INSTALLED; + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + if [ ! $? -eq 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exit"; + return $OCF_ERR_INSTALLED; + fi + + getent group $OCF_RESKEY_group >/dev/null 2>&1 + if [ ! $? -eq 0 ]; then + ocf_log err "Group $OCF_RESKEY_group doesn't exist"; + return $OCF_ERR_INSTALLED; + fi + + return $OCF_SUCCESS +} + +mysql_status() { + i=${2:-3} + sleeptime=${3:-5} + while [ $i -gt 0 ]; do + if [ -f "$OCF_RESKEY_pid" ]; then + break + fi + sleep $sleeptime + (( i-- )) + ocf_log info "PIDFile ${OCF_RESKEY_pid} of MySQL server not found. Sleeping for $sleeptime seconds. ${i} retries left" + done + + + if [ $i -eq 0 ]; then + ocf_log $1 "MySQL is not running" + return $OCF_NOT_RUNNING; + fi + + pid=$(cat $OCF_RESKEY_pid); + if [ -d /proc -a -d /proc/1 ]; then + [ "u$pid" != "u" -a -d /proc/$pid ] + else + kill -s 0 $pid >/dev/null 2>&1 + fi + + if [ $? -eq 0 ]; then + return $OCF_SUCCESS; + else + ocf_log $1 "MySQL is not running" + return $OCF_ERR_GENERIC; + fi +} + +mysql_monitor() { + local rc + local status_loglevel="err" + local WSREP_CONNECTED + local WSREP_LOCAL_STATE_COMMENT + local WSREP_READY + + # Set loglevel to info during probe + if ocf_is_probe; then + status_loglevel="info" + fi + + update_node_gtid + + mysql_status $status_loglevel + rc=$? + + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + WSREP_CONNECTED=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \ + -e "SHOW STATUS LIKE 'wsrep_connected'" | awk '{print $NF}') + rc=$? + + if [ $rc -ne 0 -o "$WSREP_CONNECTED" != "ON" ]; then + return $OCF_ERR_GENERIC + fi + + WSREP_LOCAL_STATE_COMMENT=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \ + -e "SHOW STATUS LIKE 'wsrep_local_state_comment'" | awk '{print $NF}') + rc=$? + + #Synced|Donor|Desync + if [ $rc -eq 0 ]; then + if [[ "$WSREP_LOCAL_STATE_COMMENT" =~ 'Synced'|'Donor'|'Desync' ]]; then + WSREP_READY=$($MYSQL $MYSQL_OPTIONS_TEST -s -N \ + -e "SHOW STATUS LIKE 'wsrep_ready'" | awk '{print $NF}') + rc=$? + + #Synced but wsrep not ready + if [ $rc -ne 0 -o "$WSREP_READY" != "ON" ]; then + return $OCF_ERR_GENERIC + fi + elif [[ "$WSREP_LOCAL_STATE_COMMENT" == 'Initialized' ]]; then + ocf_log err 'MySQL lost quorum or uninitialized' + return $OCF_ERR_GENERIC + fi + fi + + ocf_log debug "MySQL monitor succeeded"; + return $OCF_SUCCESS +} + +mysql_start() { + local NODES + + if mysql_status info 1; then + ocf_log info "MySQL already running" + return $OCF_SUCCESS + fi + + socket_dir="$( dirname ${OCF_RESKEY_socket} )" + if [ ! -d "${socket_dir}" ] ; then + ocf_log info "Create socket dir: ${socket_dir} and chown to ${OCF_RESKEY_user}:${OCF_RESKEY_group}" + mkdir -p "${socket_dir}" + chown ${OCF_RESKEY_user}:${OCF_RESKEY_group} "${socket_dir}" + chmod 755 "${socket_dir}" + fi + + # check and make PID file dir + pid_dir="$( dirname ${OCF_RESKEY_pid} )" + if [ ! -d "${pid_dir}" ] ; then + ocf_log info "Create PID dir: ${pid_dir} and chown to ${OCF_RESKEY_user}:${OCF_RESKEY_group}" + mkdir -p "${pid_dir}" + chown -R ${OCF_RESKEY_user}:${OCF_RESKEY_group} "${pid_dir}" + chmod 755 "${pid_dir}" + fi + + # Regardless of whether we just created the directory or it + # already existed, check whether it is writable by the configured + # user + for dir in $pid_dir $socket_dir; do + if ! su -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then + ocf_log err "Directory $dir is not writable by $OCF_RESKEY_user" + exit $OCF_ERR_PERM; + fi + done + + if [ -f /tmp/wsrep-init-file ]; then + mysql_extra_params="--init-file=/tmp/wsrep-init-file" + else + mysql_extra_params="" + fi + + update_node_gtid + check_if_reelection_needed + rc=$? + + if [ $rc -eq 1 ]; then + check_if_galera_pc + rc=$? + + if [ $rc -eq 1 ]; then + mysql_extra_params="$mysql_extra_params --wsrep-new-cluster" + fi + fi + + ocf_log info "Starting MySQL" + ${OCF_RESKEY_binary} \ + --pid-file=$OCF_RESKEY_pid \ + --socket=$OCF_RESKEY_socket \ + --datadir=$OCF_RESKEY_datadir \ + --user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \ + $mysql_extra_params >/dev/null 2>&1 & + rc=$? + + if [ $rc -ne 0 ]; then + ocf_log err "MySQL start command failed: $rc" + return $rc + fi + + # Spin waiting for the server to come up. + # Let the CRM/LRM time us out if required. + while :; do + if mysql_status info 1; then + break + fi + sleep 3 + done + + ocf_log info "MySQL started" + return $OCF_SUCCESS +} + +mysql_cleanup() { + ocf_log debug "Delete lock file: /var/lock/subsys/mysqld" + rm -f /var/lock/subsys/mysqld + + ocf_log debug "Delete sock file: ${OCF_RESKEY_socket}" + rm -f $OCF_RESKEY_socket + + ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + rm -f $OCF_RESKEY_pid + +} + +mysql_stop() { + local rc + + mysql_status info 1 + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + mysql_cleanup + return $OCF_SUCCESS + fi + + local pid=$(cat $OCF_RESKEY_pid 2>/dev/null) + local pgrp=$(ps -o pgid= ${pid}) + ocf_log info "Sending SIGTERM to PID: ${pid}" + /bin/kill -TERM ${pid} > /dev/null + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + ocf_log err "MySQL couldn't be stopped" + return $OCF_ERR_GENERIC + fi + # stop waiting + shutdown_timeout=20 + while [ $shutdown_timeout -gt 0 ]; do + mysql_status info 1 + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + /usr/bin/pkill -9 -g ${pgrp} > /dev/null + mysql_cleanup + return $OCF_SUCCESS + fi + sleep 2 + (( shutdown_timeout -= 2 )) + ocf_log info "MySQL still hasn't stopped yet. ${shutdown_timeout} seconds left " + done + + mysql_status info 1 + rc=$? + if [ $rc -ne $OCF_NOT_RUNNING ]; then + ocf_log info "MySQL failed to stop using SIGTERM. Sending SIGKILL to PID: ${pid}" + /usr/bin/pkill -9 -g ${pgrp} > /dev/null + fi + + mysql_cleanup + return $OCF_SUCCESS +} + +########################################################################## +# If DEBUG_LOG is set, make this resource agent easy to debug: set up the +# debug log and direct all output to it. Otherwise, redirect to /dev/null. +# The log directory must be a directory owned by root, with permissions 0700, +# and the log must be writable and not a symlink. +########################################################################## +DEBUG_LOG="/tmp/mysql.ocf.ra.debug/log" +if [ "${DEBUG_LOG}" -a -w "${DEBUG_LOG}" -a ! -L "${DEBUG_LOG}" ]; then + DEBUG_LOG_DIR="${DEBUG_LOG%/*}" + if [ -d "${DEBUG_LOG_DIR}" ]; then + exec 9>>"$DEBUG_LOG" + exec 1>&9 2>&9 + date '+%Y%m%d %H:%M:%S' >&9 + echo "$*" >&9 + env | grep OCF_ | sort >&9 + set -x + else + exec 9>/dev/null + fi +fi + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +mysql_validate +rc=$? +if [ $rc -ne 0 ]; then + case "$1" in + stop) exit $OCF_SUCCESS;; + monitor) exit $OCF_NOT_RUNNING;; + *) exit $rc;; + esac +fi + +# What kind of method was invoked? +case "$1" in + start) mysql_start;; + stop) mysql_stop;; + monitor) mysql_monitor;; + validate-all) exit $OCF_SUCCESS;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac +# vim: set ts=4 sw=4 tw=0 et : diff --git a/files/fuel-ha-utils/ocf/ns_IPaddr2 b/files/fuel-ha-utils/ocf/ns_IPaddr2 new file mode 100755 index 0000000000..753acb1b2d --- /dev/null +++ b/files/fuel-ha-utils/ocf/ns_IPaddr2 @@ -0,0 +1,700 @@ +#!/bin/bash + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +# Defaults + +OCF_RESKEY_cidr_netmask_default="32" +OCF_RESKEY_ns_default="" +OCF_RESKEY_base_veth_default="" # should be defined +OCF_RESKEY_ns_veth_default="" # should be defined +OCF_RESKEY_gateway_default="" # can be "none", "link", IPaddr +OCF_RESKEY_gateway_metric_default=0 # can be "", or metric value +OCF_RESKEY_also_check_interfaces_default="" # can be "", or list of interfaces +OCF_RESKEY_enable_forwarding_default=true +OCF_RESKEY_other_networks_default="" +OCF_RESKEY_bridge_default="" # can be "", or bridge name + +: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}} +: ${OCF_RESKEY_ns=${OCF_RESKEY_ns_default}} +: ${OCF_RESKEY_base_veth=${OCF_RESKEY_base_veth_default}} +: ${OCF_RESKEY_ns_veth=${OCF_RESKEY_ns_veth_default}} +: ${OCF_RESKEY_gateway=${OCF_RESKEY_gateway_default}} +: ${OCF_RESKEY_gateway_metric=${OCF_RESKEY_gateway_metric_default}} +: ${OCF_RESKEY_also_check_interfaces=${OCF_RESKEY_also_check_interfaces_default}} +: ${OCF_RESKEY_enable_forwarding=${OCF_RESKEY_enable_forwarding_default}} +: ${OCF_RESKEY_other_networks=${OCF_RESKEY_other_networks_default}} +: ${OCF_RESKEY_bridge=${OCF_RESKEY_bridge_default}} + +FAMILY='inet' +RUN_IN_NS="ip netns exec $OCF_RESKEY_ns " +SH="/bin/bash" +SENDARP=$HA_BIN/send_arp +SENDARPPIDDIR=$HA_RSCTMP +SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip" +####################################################################### + +####################################################################### + +meta_data() { + cat < + + +1.0 + + +This Linux-specific resource manages IP address inside network namespace. + + +This Linux-specific resource manages IP address inside network namespace. + + + + +The IPv4 address to be configured in dotted quad notation, for example +"192.168.1.1". + +IPv4 address + + + + + +The base network interface on which the IP address will be brought +online. +If left empty, the script will try and determine this from the +routing table. + +Do NOT specify an alias interface in the form eth0:1 or anything here; +rather, specify the base interface only. +If you want a label, see the iflabel parameter. + +Prerequisite: + +There must be at least one static IP address, which is not managed by +the cluster, assigned to the network interface. +If you can not assign any static IP address on the interface, +modify this kernel parameter: + +sysctl -w net.ipv4.conf.all.promote_secondaries=1 # (or per device) + +Network interface + + + + + +The netmask for the interface in CIDR format +(e.g., 24 and not 255.255.255.0) + +If unspecified, the script will also try to determine this from the +routing table. + +CIDR netmask + + + + + +You can specify an additional label for your IP address here. +This label is appended to your interface name. + +A label can be specified in nic parameter but it is deprecated. +If a label is specified in nic name, this parameter has no effect. + +Interface label + + + + + +Name of network namespace.\n +Should be present. + +Name of network namespace. + + + + + +Name of base system side veth pair tail.\n +Should be present. + +Name of base system side veth pair tail. + + + + + +Name of net.namespace side veth pair tail.\n +Should be present. + +Name of net.namespace side veth pair tail. + + + + + +Default route address.\n +Can be "", "link" or IP address. + +Default route address. + + + + + +Default route address.\n +Can be "", "link" or IP address. + +Default route address. + + + + + +Setup forwarding on base system. + +Setup forwarding. + + + + + +Iptables rules that should be started along with IP.\n + +Iptables rules associated with IP start. + + + + + +Iptables rules that should be stopped along with IP.\n + +Iptables rules associated with IP stop. + + + + + +Iptables rules that should be started along with IP in the namespace.\n + +Iptables rules associated with IP start in ns. + + + + + +Iptables rules that should be stopped along with IP in the namespace.\n + +Iptables rules associated with IP stop in ns. + + + + + +Iptables comment to associate with rules.\n + +Iptables comment to associate with rules. + + + + + +Network interfaces list (ex. NIC), that should be in UP state for monitor action returns succesful.\n + +Network interfaces list (ex. NIC), that should be in UP state for monitor action returns succesful. + + + + + +Additional routes that should be added to this resource. Routes will be added via value ns_veth. + +List of addtional routes to add routes for. + + + + + +Name of the bridge that has ns_veth connected to it. + +Name of the bridge. + + + + + + + + + + + + + + +END + + exit $OCF_SUCCESS +} + + +ip_validate() { + + if [[ X`uname -s` != "XLinux" ]] ; then + ocf_log err "ns_IPaddr2 only supported Linux." + exit $OCF_ERR_INSTALLED + fi + + if [[ -z $OCF_RESKEY_ip ]] ; then + ocf_log err "IP address not given" + exit $OCF_ERR_CONFIGURED + fi + + if [[ -z $OCF_RESKEY_ns ]] ; then + ocf_log err "Network namespace not given" + exit $OCF_ERR_CONFIGURED + fi + + if [[ -z $OCF_RESKEY_cidr_netmask ]] ; then + ocf_log err "CIDR Netmask not given" + exit $OCF_ERR_CONFIGURED + fi + + if [[ -z $OCF_RESKEY_nic ]] ; then + ocf_log err "Base NIC not given" + exit $OCF_ERR_CONFIGURED + fi + + if [[ -z $OCF_RESKEY_base_veth ]] ; then + ocf_log err "Base veth tail name not given" + exit $OCF_ERR_CONFIGURED + fi + + if [[ -z $OCF_RESKEY_ns_veth ]] ; then + ocf_log err "NS veth tail name not given" + exit $OCF_ERR_CONFIGURED + fi + + if ! ocf_is_decimal "$OCF_RESKEY_gateway_metric"; then + ocf_log err "Gateway_metric should be a positive digital value" + exit $OCF_ERR_CONFIGURED + fi + + return $OCF_SUCCESS +} + + +# +# Find out which interfaces serve the given IP address and netmask. +# The arguments are an IP address and a netmask. +# Its output are interface names devided by spaces (e.g., "eth0 eth1"). +# +find_interface() { + local ipaddr="$1" + local netmask="$2" + [[ -z netmask ]] || ipaddr="$ipaddr/$netmask" + + # + # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces + local iface="`ip -o -f inet addr show \ + | grep "\ $ipaddr" \ + | cut -d ' ' -f2 \ + | grep -v '^ipsec[[0-9]][[0-9]]*$'`" + local rc=$? + echo "$iface" + return $rc +} + +find_interface_in_ns() { + local ns="$1" + local ipaddr="$2" + local netmask="$3" + [[ -z netmask ]] || ipaddr="$ipaddr/$netmask" + + # + # List interfaces but exclude FreeS/WAN ipsecN virtual interfaces + local iface=`ip netns exec $ns ip -o -f inet addr show \ + | grep "\ $ipaddr" \ + | cut -d ' ' -f2 \ + | grep -v '^ipsec[[0-9]][[0-9]]*$'` + local rc=$? + echo "$iface" + return $rc +} + +setup_routes() { + if [[ "${OCF_RESKEY_other_networks}" != "false" ]] ; then + for network in ${OCF_RESKEY_other_networks} + do + ocf_log debug "Adding route on the host system to ${network}: ${OCF_RESKEY_namespace_ip}" + ocf_run $RUN_IN_NS ip route add ${network} dev ${OCF_RESKEY_ns_veth} + done + fi +} + +setup_forwarding() { + local forwarding + local rc=$OCF_SUCCESS + ocf_is_true ${OCF_RESKEY_enable_forwarding} + if [[ $? == 0 ]] ; then + ocf_run $RUN_IN_NS sysctl -w net.ipv4.ip_forward=1 + forwarding=$(cat /proc/sys/net/ipv4/ip_forward) + if [[ "${forwarding}" != "1" ]] ; then + ocf_run sysctl -w net.ipv4.ip_forward=1 + rc=$? + fi + fi + return $rc +} + +add_to_bridge() { + local bridge_mtu=`cat /sys/class/net/${OCF_RESKEY_bridge}/mtu` + [ -d /sys/class/net/${OCF_RESKEY_bridge}/brif ] + if [[ $? == 0 ]]; then + ifconfig $OCF_RESKEY_base_veth mtu $bridge_mtu + brctl addif $OCF_RESKEY_bridge $OCF_RESKEY_base_veth && ocf_run ifconfig $OCF_RESKEY_base_veth 0.0.0.0 || return $OCF_ERR_GENERIC + else + ovs-vsctl show | grep $OCF_RESKEY_ns_veth + if [[ $? != 0 ]] ; then + $RUN_IN_NS ifconfig $OCF_RESKEY_ns_veth mtu $bridge_mtu + ocf_run ovs-vsctl add-port $OCF_RESKEY_bridge $OCF_RESKEY_ns_veth -- set Interface $OCF_RESKEY_ns_veth type=internal + fi + $RUN_IN_NS ip a | grep $OCF_RESKEY_ns_veth + if [[ $? != 0 ]] ; then + ocf_run ip link set $OCF_RESKEY_ns_veth netns $OCF_RESKEY_ns + ocf_run $RUN_IN_NS ifconfig $OCF_RESKEY_ns_veth $OCF_RESKEY_ip/$OCF_RESKEY_cidr_netmask + fi + fi + return $OCF_SUCCESS +} + +remove_from_bridge() { + [ -d /sys/class/net/${OCF_RESKEY_bridge}/brif ] + if [[ $? == 0 ]]; then + brctl delif $OCF_RESKEY_bridge $OCF_RESKEY_base_veth + else + ip netns exec network ifconfig $OCF_RESKEY_ns_veth 0.0.0.0 + fi +} + +get_first_ip_mask_for_if() { + local iface="$1" + local ns="$2" + local RUN='' + [[ -z ns ]] && RUN=$RUN_IN_NS + local addr=`$RUN ip -o -f inet a show dev $iface \ + | sed -re '1!d; s|.*\s([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/[0-9]+).*|\1|'` + local rc=$? + [[ $rc != 0 ]] && addr='' + echo "$addr" + return $rc +} + +get_first_ip_for_if() { + local iface="$1" + local ns="$2" + + local addr=`get_first_ip_mask_for_if $iface $ns \ + | sed -re 's|([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)/.*|\1|'` + local rc=$? + [[ $rc != 0 ]] && addr='' + echo "$addr" + return $rc +} + + +####################################################################### + + +check_ns() { + local ns=`ip netns list | grep "$OCF_RESKEY_ns"` + [[ $ns != $OCF_RESKEY_ns ]] && return $OCF_ERR_GENERIC + return $OCF_SUCCESS +} + +get_ns() { + local rc + check_ns && return $OCF_SUCCESS + + ocf_run ip netns add $OCF_RESKEY_ns + rc=$? + ocf_run $RUN_IN_NS ip link set up dev lo + + return $rc +} + +get_veth_pair() { + local rc + local rc1 + local ipaddr + + # check tail of veth-pair in base system + ocf_run ip link show $OCF_RESKEY_base_veth 2>/dev/null + rc=$? + + # create pair (tail's can't be alone) and attach tail to the net.namespace + if [[ $rc != 0 ]] ; then + ovs-vsctl show | grep $OCF_RESKEY_ns_veth + rc1=$? + if [[ $rc1 != 0 ]] ; then + ocf_run ip link add $OCF_RESKEY_base_veth type veth peer name $OCF_RESKEY_ns_veth + ocf_run ip link set dev $OCF_RESKEY_ns_veth netns $OCF_RESKEY_ns + ocf_run $RUN_IN_NS ip link set up dev $OCF_RESKEY_ns_veth + ocf_run ip link set up dev $OCF_RESKEY_base_veth + sleep 1 + fi + # duplicate first IP address from base iface to the veth + if [[ -n $OCF_RESKEY_bridge ]] ; then + ipaddr=`get_first_ip_mask_for_if $OCF_RESKEY_bridge` + else + ipaddr=`get_first_ip_mask_for_if $OCF_RESKEY_nic` + fi + [[ -z $ipaddr ]] && return 0 # dublicate nothing + + if [[ $rc1 != 0 ]] ; then + ocf_run ip addr add $ipaddr dev $OCF_RESKEY_base_veth + fi + + if [[ -z $OCF_RESKEY_bridge ]] ; then + echo 1 > /proc/sys/net/ipv4/conf/$OCF_RESKEY_nic/proxy_arp + echo 1 > /proc/sys/net/ipv4/conf/$OCF_RESKEY_base_veth/proxy_arp + else + add_to_bridge + fi + fi + return 0 +} + +check_interfaces_for_up_state() { + local interfaces=$(echo "$1" | tr " ,:;" "\n") + local rc=$OCF_SUCCESS + + for i in $interfaces ; do + rv=$(cat /sys/class/net/$i/carrier) # can return non-zero error-code for administrative-downed interface + if [[ $? != 0 || $rv != "1" ]] ; then + rc=$OCF_NOT_RUNNING + break + fi + done + + return $rc +} + +ip_prepare() { + local rc + ip_validate + [[ $? != 0 ]] && return $OCF_ERR_GENERIC + + # create or get existing network namespace + get_ns || return $OCF_ERR_GENERIC + + # create or get existing pair of veth interfaces + get_veth_pair || return $OCF_ERR_GENERIC + + # attach IP address inside network namespace + ocf_run $RUN_IN_NS ip addr replace "$OCF_RESKEY_ip/$OCF_RESKEY_cidr_netmask" dev $OCF_RESKEY_ns_veth + [[ $? != 0 ]] && return $OCF_ERR_GENERIC + + # modify route in base system + ovs-vsctl show | grep $OCF_RESKEY_ns_veth + if [[ $? != 0 ]] ; then + ocf_run ip route flush dev $OCF_RESKEY_base_veth + [[ $? != 0 ]] && return $OCF_ERR_GENERIC + fi + + if [[ -z $OCF_RESKEY_bridge ]] ; then + ocf_run ip route add $OCF_RESKEY_ip dev $OCF_RESKEY_base_veth + [[ $? != 0 ]] && return $OCF_ERR_GENERIC + fi + + # setup default routing in namespace if gateway given + if [[ $OCF_RESKEY_gateway == 'link' ]] ; then + ocf_run $RUN_IN_NS ip route replace default dev $OCF_RESKEY_ns_veth metric $OCF_RESKEY_gateway_metric + elif [[ $OCF_RESKEY_gateway == 'none' ]] ; then + echo "Do nothing" + else + ocf_run $RUN_IN_NS ip route replace default via $OCF_RESKEY_gateway metric $OCF_RESKEY_gateway_metric + fi + ARGS="-i 200 -r 5 -p $SENDARPPIDFILE $OCF_RESKEY_nic $OCF_RESKEY_ip auto not_used not_used" + ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 + return $OCF_SUCCESS +} + +iptables_start() { + local rc + local iptables_rules + local ns_iptables_rules + local rule + rc=$OCF_SUCCESS + # setup iptables rules if given + if [[ $OCF_RESKEY_iptables_start_rules != "false" ]] ; then + IFS=';' read -a iptables_rules <<< "$OCF_RESKEY_iptables_start_rules" + for rule in "${iptables_rules[@]}" + do + ocf_run $rule -m comment --comment "$OCF_RESKEY_iptables_comment" + done + fi + + if [[ $OCF_RESKEY_ns_iptables_start_rules != "false" ]] ; then + IFS=';' read -a ns_iptables_rules <<< "$OCF_RESKEY_ns_iptables_start_rules" + for rule in "${ns_iptables_rules[@]}" + do + ocf_run ip netns exec $OCF_RESKEY_ns $rule + done + fi + + setup_routes + return $rc +} + +iptables_stop() { + local rc + local iptables_rules + local ns_iptables_rules + local rule + rc=$OCF_SUCCESS + # remove iptables rules if given + if [[ $OCF_RESKEY_iptables_stop_rules != "false" ]] ; then + IFS=';' read -a iptables_rules <<< "$OCF_RESKEY_iptables_stop_rules" + for rule in "${iptables_rules[@]}" + do + ocf_run $rule -m comment --comment "$OCF_RESKEY_iptables_comment" + done + fi + + if [[ $OCF_RESKEY_ns_iptables_stop_rules != "false" ]] ; then + IFS=';' read -a ns_iptables_rules <<< "$OCF_RESKEY_ns_iptables_stop_rules" + for rule in "${ns_iptables_rules[@]}" + do + ocf_run ip netns exec $OCF_RESKEY_ns $rule + done + fi + + return $rc +} + +ip_start() { + setup_forwarding + check_interfaces_for_up_state "$OCF_RESKEY_nic:$OCF_RESKEY_also_check_interfaces" || return $OCF_ERR_GENERIC + ip_prepare + + rc=$? + if [[ $rc != $OCF_SUCCESS ]] ; then + # cleanun ns + ip_stop + rc=$OCF_ERR_GENERIC + else + iptables_start + fi + return $rc +} + +ip_stop() { + local rc + ip_validate + if [[ -n $OCF_RESKEY_bridge ]] ; then + remove_from_bridge + fi + # destroy veth-pair in base system + ocf_run ip link show $OCF_RESKEY_base_veth 2>/dev/null + rc=$? + if [[ $rc == 0 ]] ; then + ocf_run ip link set down dev $OCF_RESKEY_base_veth && + sleep 2 && # prevent race + ocf_run ip link del dev $OCF_RESKEY_base_veth + rc=$? + else + rc=0 + fi + + if [ -f "$SENDARPPIDFILE" ] ; then + kill `cat "$SENDARPPIDFILE"` + if [ $? -ne 0 ]; then + ocf_log warn "Could not kill previously running send_arp for $OCF_RESKEY_ip" + else + ocf_log info "killed previously running send_arp for $OCF_RESKEY_ip" + rm -f "$SENDARPPIDFILE" + fi + fi + + + if [[ $rc == 0 ]] ; then + rc=$OCF_SUCCESS # it means stop was success + iptables_stop + else + rc=$OCF_ERR_GENERIC + fi + return $rc +} + +ip_monitor() { + local rc + ip_validate + check_ns || return $OCF_NOT_RUNNING + local iface=$(find_interface_in_ns $OCF_RESKEY_ns $OCF_RESKEY_ip $OCF_RESKEY_cidr_netmask) + + [[ -z $iface ]] && return $OCF_NOT_RUNNING + + #todo: finding IP from VIP subnet + if [[ $OCF_RESKEY_bridge == false ]] ; then + local ipaddr=$(get_first_ip_for_if $OCF_RESKEY_nic) + else + local ipaddr=$(get_first_ip_for_if $OCF_RESKEY_bridge) + fi + [[ -z $ipaddr ]] && return $OCF_NOT_RUNNING + + check_interfaces_for_up_state "$OCF_RESKEY_nic:$OCF_RESKEY_also_check_interfaces" || return $OCF_NOT_RUNNING + ocf_run $RUN_IN_NS ping -n -c3 -q $ipaddr 2>&1 >>/dev/null || return $OCF_NOT_RUNNING + setup_forwarding + return $OCF_SUCCESS +} + + +ip_usage() { + cat < + + +1.2 + + +Master/Slave OCF Resource Agent for conntrackd + + +This resource agent manages conntrackd + + + +Name of the conntrackd executable. +If conntrackd is installed and available in the default PATH, it is sufficient to configure the name of the binary +For example "my-conntrackd-binary-version-0.9.14" +If conntrackd is installed somewhere else, you may also give a full path +For example "/packages/conntrackd-0.9.14/sbin/conntrackd" + +Name of the conntrackd executable + + + + +Full path to the conntrackd.conf file. +For example "/packages/conntrackd-0.9.14/etc/conntrackd/conntrackd.conf" +Path to conntrackd.conf + + + + + + + + + + + + + + + + +END +} + +meta_expect() +{ + local what=$1 whatvar=OCF_RESKEY_CRM_meta_${1//-/_} op=$2 expect=$3 + local val=${!whatvar} + if [[ -n $val ]]; then + # [, not [[, or it won't work ;) + [ $val $op $expect ] && return + fi + ocf_exit_reason "meta parameter misconfigured, expected $what $op $expect, but found ${val:-unset}." + exit $OCF_ERR_CONFIGURED +} + +conntrackd_is_master() { + # You can't query conntrackd whether it is master or slave. It can be both at the same time. + # This RA creates a statefile during promote and enforces master-max=1 and clone-node-max=1 + ha_pseudo_resource $statefile monitor +} + +conntrackd_set_master_score() { + ${HA_SBIN_DIR}/crm_master -Q -l reboot -v $1 +} + +add_to_bridge() { + brctl show $OCF_RESKEY_bridge | grep $OCF_RESKEY_host_interface + if [[ $rc != 0 ]] ; then + ocf_run brctl addif $OCF_RESKEY_bridge $OCF_RESKEY_host_interface + ocf_run ifconfig $OCF_RESKEY_host_interface 0.0.0.0 + fi +} + +get_veth_pair() { + ip netns list | grep "$OCF_RESKEY_ns" + + [[ $? != 0 ]] && ip netns add $OCF_RESKEY_ns && ip netns exec $OCF_RESKEY_ns ip link set up dev lo + + ocf_run ip netns exec $OCF_RESKEY_ns ip link show $OCF_RESKEY_ns_interface 2>/dev/null + rc=$? + + # create pair (tail's can't be alone) and attach tail to the net.namespace + if [[ $rc != 0 ]] ; then + local last_octet=$(ifconfig $OCF_RESKEY_bridge 2>/dev/null|awk '/inet addr:/ {print $2}'|sed 's/addr://' | awk -F. '{print $NF}') + + ocf_run ip link add $OCF_RESKEY_host_interface type veth peer name $OCF_RESKEY_ns_interface + ocf_run ip link set dev $OCF_RESKEY_ns_interface netns $OCF_RESKEY_ns + ocf_run ip netns exec $OCF_RESKEY_ns ip link set up dev $OCF_RESKEY_ns_interface + ocf_run ip link set up dev $OCF_RESKEY_host_interface + ocf_run ip netns exec $OCF_RESKEY_ns ifconfig $OCF_RESKEY_ns_interface 10.0.0.${last_octet}/24 + fi + + add_to_bridge + + return 0 +} + +conntrackd_monitor() { + + get_veth_pair + + rc=$OCF_NOT_RUNNING + # It does not write a PID file, so check the socket exists after + # extracting its path from the configuration file + local conntrack_socket=$(awk '/^[ \t]*UNIX[ \t]*{/,/^[ \t]*}/ { if ($1 == "Path") { print $2 } }' $OCF_RESKEY_config) + [ -S "$conntrack_socket" ] && rc=$OCF_SUCCESS + if [ "$rc" -eq "$OCF_SUCCESS" ]; then + # conntrackd is running + # now see if it acceppts queries + if ! ip netns exec $OCF_RESKEY_ns $OCF_RESKEY_binary -C $OCF_RESKEY_config -s > /dev/null 2>&1; then + rc=$OCF_ERR_GENERIC + ocf_exit_reason "conntrackd is running but not responding to queries" + fi + if conntrackd_is_master; then + rc=$OCF_RUNNING_MASTER + # Restore master setting on probes + if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then + conntrackd_set_master_score $master_score + fi + else + # Restore master setting on probes + if [ $OCF_RESKEY_CRM_meta_interval -eq 0 ]; then + conntrackd_set_master_score $slave_score + fi + fi + fi + return $rc +} + +conntrackd_start() { + rc=$OCF_ERR_GENERIC + + # Keep trying to start the resource; + # wait for the CRM to time us out if this fails + while :; do + conntrackd_monitor + status=$? + case "$status" in + $OCF_SUCCESS) + conntrackd_set_master_score $slave_score + # -n = request resync from the others + if ! ip netns exec $OCF_RESKEY_ns $OCF_RESKEY_binary -C $OCF_RESKEY_config -n; then + ocf_exit_reason "$OCF_RESKEY_binary -C $OCF_RESKEY_config -n failed during start." + rc=$OCF_ERR_GENERIC + else + rc=$OCF_SUCCESS + fi + break + ;; + $OCF_NOT_RUNNING) + ocf_log info "Starting conntrackd" + ip netns exec $OCF_RESKEY_ns $OCF_RESKEY_binary -C $OCF_RESKEY_config -d + ;; + $OCF_RUNNING_MASTER) + ocf_log warn "conntrackd already in master mode, demoting." + ha_pseudo_resource $statefile stop + ;; + $OCF_ERR_GENERIC) + ocf_exit_reason "conntrackd start failed" + rc=$OCF_ERR_GENERIC + break + ;; + esac + done + return $rc +} + +conntrackd_stop() { + rc=$OCF_ERR_GENERIC + + # Keep trying to bring down the resource; + # wait for the CRM to time us out if this fails + while :; do + conntrackd_monitor + status=$? + case "$status" in + $OCF_SUCCESS|$OCF_ERR_GENERIC) + ocf_log info "Stopping conntrackd" + ip netns exec $OCF_RESKEY_ns $OCF_RESKEY_binary -C $OCF_RESKEY_config -k + ;; + $OCF_NOT_RUNNING) + rc=$OCF_SUCCESS + break + ;; + $OCF_RUNNING_MASTER) + ocf_log warn "conntrackd still master" + ;; + esac + done + return $rc + +} + +conntrackd_validate_all() { + check_binary "$OCF_RESKEY_binary" + if ! [ -e "$OCF_RESKEY_config" ]; then + ocf_exit_reason "Config FILE $OCF_RESKEY_config does not exist" + return $OCF_ERR_INSTALLED + fi + meta_expect master-node-max = 1 + meta_expect master-max = 1 + meta_expect clone-node-max = 1 + + return $OCF_SUCCESS +} + +conntrackd_promote() { + rc=$OCF_SUCCESS + if ! conntrackd_is_master; then + # -c = Commit the external cache to the kernel + # -f = Flush internal and external cache + # -R = resync with the kernel table + # -B = send a bulk update on the line + for parm in c f R B; do + if ! ip netns exec $OCF_RESKEY_ns $OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm; then + ocf_exit_reason "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during promote." + rc=$OCF_ERR_GENERIC + break + fi + done + ha_pseudo_resource $statefile start + conntrackd_set_master_score $master_score + fi + return $rc +} + +conntrackd_demote() { + rc=$OCF_SUCCESS + if conntrackd_is_master; then + # -t = shorten kernel timers to remove zombies + # -n = request a resync from the others + for parm in t n; do + if ! ip netns exec $OCF_RESKEY_ns $OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm; then + ocf_exit_reason "$OCF_RESKEY_binary -C $OCF_RESKEY_config -$parm failed during demote." + rc=$OCF_ERR_GENERIC + break + fi + done + ha_pseudo_resource $statefile stop + conntrackd_set_master_score $slave_score + fi + return $rc +} + +conntrackd_notify() { + hostname=$(hostname) + # OCF_RESKEY_CRM_meta_notify_master_uname is a whitespace separated list of master hostnames + for master in $OCF_RESKEY_CRM_meta_notify_master_uname; do + # if we are the master and an instance was just started on another node: + # send a bulk update to allow failback + if [ "$hostname" = "$master" -a "$OCF_RESKEY_CRM_meta_notify_type" = "post" -a "$OCF_RESKEY_CRM_meta_notify_operation" = "start" -a "$OCF_RESKEY_CRM_meta_notify_start_uname" != "$hostname" ]; then + ocf_log info "Sending bulk update in post start to peers to allow failback" + ip netns exec $OCF_RESKEY_ns $OCF_RESKEY_binary -C $OCF_RESKEY_config -B + fi + done + for tobepromoted in $OCF_RESKEY_CRM_meta_notify_promote_uname; do + # if there is a promote action to be executed on another node: + # send a bulk update to allow failback + if [ "$hostname" != "$tobepromoted" -a "$OCF_RESKEY_CRM_meta_notify_type" = "pre" -a "$OCF_RESKEY_CRM_meta_notify_operation" = "promote" ]; then + ocf_log info "Sending bulk update in pre promote to peers to allow failback" + ip netns exec $OCF_RESKEY_ns $OCF_RESKEY_binary -C $OCF_RESKEY_config -B + fi + done +} + +conntrackd_usage() { + cat <&2 +} + +meta_data() +{ +cat < + + +1.0 + +This script manages dnsmasq daemon with namespace support + +Manages an dnsmasq daemon inside a namespace + + + + + +Name of network namespace. +Should be present. + +Name of network namespace. + + + + + +The dnsmasq daemon configuration file name with full path. +For example, "/etc/dnsmasq/dnsmasq.cfg" + +Configuration file name with full path + + + + + +The dnsmasq pid file path. +For example, "/var/run/dnsmasq.pid" + +Full path to the dnsmasq pid file + + + + + +The dnsmasq binary path. +For example, "/usr/sbin/dnsmasq" + +Full path to the dnsmasq binary + + + + + +Extra command line arguments to pass to dnsmasq. +For example, "-f /etc/dnsmasq/shared.cfg" + +Extra command line arguments for dnsmasq + + + + + + + + + + + + + +END +exit $OCF_SUCCESS +} + +check_ns() { + local ns=`ip netns list | grep "$OCF_RESKEY_ns"` + [[ $ns != $OCF_RESKEY_ns ]] && return $OCF_ERR_GENERIC + return $OCF_SUCCESS +} + +get_ns() { + local rc + check_ns && return $OCF_SUCCESS + + ocf_run ip netns add $OCF_RESKEY_ns + rc=$? + ocf_run $RUN_IN_NS ip link set up dev lo + + return $rc +} + +get_variables() { + get_ns + CONF_FILE="${OCF_RESKEY_conffile}" + COMMAND="$RUN ${OCF_RESKEY_binpath}" + PIDFILE="${OCF_RESKEY_pidfile}" +} + +dnsmasq_status() { + get_variables + if [ -n "${PIDFILE}" -a -f "${PIDFILE}" ]; then + # dnsmasq is probably running + # get pid from pidfile + PID="`cat ${PIDFILE}`" + if [ -n "${PID}" ]; then + # check if process exists + if $RUN ps -p "${PID}" | grep -q dnsmasq; then + ocf_log info "dnsmasq daemon running" + return $OCF_SUCCESS + else + ocf_log info "dnsmasq daemon is not running but pid file exists" + return $OCF_NOT_RUNNING + fi + else + ocf_log err "PID file empty!" + return $OCF_ERR_GENERIC + fi + fi + # dnsmasq is not running + ocf_log info "dnsmasq daemon is not running" + return $OCF_NOT_RUNNING +} + +dnsmasq_start() +{ + get_variables + # if dnsmasq is running return success + dnsmasq_status + retVal=$? + if [ $retVal -eq $OCF_SUCCESS ]; then + return $OCF_SUCCESS + elif [ $retVal -ne $OCF_NOT_RUNNING ]; then + ocf_log err "Error. Unknown status." + return $OCF_ERR_GENERIC + fi + + # run the dnsmasq binary + ocf_run ${COMMAND} ${OCF_RESKEY_extraconf} --conf-file=${CONF_FILE} --pid-file="${PIDFILE}" + if [ $? -ne 0 ]; then + ocf_log err "Error. dnsmasq daemon returned error $?." + return $OCF_ERR_GENERIC + fi + + ocf_log info "Started dnsmasq daemon." + return $OCF_SUCCESS +} + +dnsmasq_stop() +{ + get_variables + if dnsmasq_status ; then + PID="`${RUN} cat ${PIDFILE}`" + if [ -n "${PID}" ] ; then + kill "${PID}" + if [ $? -ne 0 ]; then + kill -SIGKILL "${PID}" + if [ $? -ne 0 ]; then + ocf_log err "Error. Could not stop dnsmasq daemon." + return $OCF_ERR_GENERIC + fi + fi + rm -f "${PIDFILE}" + fi + fi + ocf_log info "Stopped dnsmasq daemon." + return $OCF_SUCCESS +} + +dnsmasq_monitor() +{ + dnsmasq_status +} + +dnsmasq_validate_all() +{ + get_variables + if [ -n "$OCF_RESKEY_binpath" -a ! -x "$OCF_RESKEY_binpath" ]; then + ocf_log err "Binary path $OCF_RESKEY_binpath does not exist." + return $OCF_ERR_ARGS + fi + if [ -n "$OCF_RESKEY_conffile" -a ! -f "$OCF_RESKEY_conffile" ]; then + ocf_log err "Config file $OCF_RESKEY_conffile does not exist." + return $OCF_ERR_ARGS + fi + + if grep -v "^#" "$CONF_FILE" | grep "pidfile" > /dev/null ; then + : + else + ocf_log err "Error. \"pidfile\" entry required in the dnsmasq config file by dnsmasq OCF RA." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +dnsmasq_restart() +{ + dnsmasq_stop + dnsmasq_start +} + +# +# Main +# + +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi + +case $1 in + start) dnsmasq_start + ;; + + stop) dnsmasq_stop + ;; + + restart) dnsmasq_restart + ;; + + status) dnsmasq_status + ;; + + monitor) dnsmasq_monitor + ;; + + validate-all) dnsmasq_validate_all + ;; + + meta-data) meta_data + ;; + + usage) usage; exit $OCF_SUCCESS + ;; + + *) usage; exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/files/fuel-ha-utils/ocf/ns_haproxy b/files/fuel-ha-utils/ocf/ns_haproxy new file mode 100755 index 0000000000..2f582ce046 --- /dev/null +++ b/files/fuel-ha-utils/ocf/ns_haproxy @@ -0,0 +1,555 @@ +#!/bin/bash +# +# Resource script for haproxy daemon with namespace support +# +# Description: Manages haproxy daemon as an OCF resource in +# an High Availability setup inside a namespace +# +# HAProxy OCF script's Author: Mirantis +# License: GNU General Public License (GPL) +# +# usage: $0 {start|stop|restart|status|monitor|validate-all|meta-data} +# +# The "start" arg starts haproxy. +# +# The "stop" arg stops it. +# +# OCF parameters: +# OCF_RESKEY_ns +# OCF_RESKEY_conffile +# OCF_RESKEY_pidfile +# OCF_RESKEY_binpath +# OCF_RESKEY_extraconf +# +# OCF_RESKEY_host_interface +# OCF_RESKEY_namespace_interface +# OCF_RESKEY_host_ip +# OCF_RESKEY_namespace_ip +# OCF_RESKEY_network_mask +# OCF_RESKEY_route_metric +# +# Note: This RA requires that the haproxy config files has a "pidfile" +# entry so that it is able to act on the correct process +########################################################################## +# Initialization: + +OCF_ROOT_default="/usr/lib/ocf" + +OCF_RESKEY_ns_default="haproxy" +OCF_RESKEY_conffile_default="/etc/haproxy/haproxy.cfg" +OCF_RESKEY_pidfile_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" +OCF_RESKEY_binpath_default="/usr/sbin/haproxy" +OCF_RESKEY_extraconf_default="" + +OCF_RESKEY_other_networks_default="" +OCF_RESKEY_host_interface_default="hapr-host" +OCF_RESKEY_namespace_interface_default="hapr-ns" +OCF_RESKEY_host_ip_default="240.0.0.1" +OCF_RESKEY_namespace_ip_default="240.0.0.2" +OCF_RESKEY_network_mask_default="30" +OCF_RESKEY_route_metric_default="10000" +OCF_RESKEY_debug_default=false + +: ${OCF_ROOT=${OCF_ROOT_default}} + +: ${OCF_RESKEY_ns=${OCF_RESKEY_ns_default}} +: ${OCF_RESKEY_conffile=${OCF_RESKEY_conffile_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_binpath=${OCF_RESKEY_binpath_default}} +: ${OCF_RESKEY_extraconf=${OCF_RESKEY_extraconf_default}} + +: ${OCF_RESKEY_other_networks=${OCF_RESKEY_other_networks_default}} +: ${OCF_RESKEY_host_interface=${OCF_RESKEY_host_interface_default}} +: ${OCF_RESKEY_namespace_interface=${OCF_RESKEY_namespace_interface_default}} +: ${OCF_RESKEY_host_ip=${OCF_RESKEY_host_ip_default}} +: ${OCF_RESKEY_namespace_ip=${OCF_RESKEY_namespace_ip_default}} +: ${OCF_RESKEY_network_mask=${OCF_RESKEY_network_mask_default}} +: ${OCF_RESKEY_route_metric=${OCF_RESKEY_route_metric_default}} +: ${OCF_RESKEY_debug=${OCF_RESKEY_debug_default}} + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat} +. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs + +USAGE="Usage: $0 {start|stop|restart|status|monitor|validate-all|meta-data}"; + +RUN_IN_NS="ip netns exec $OCF_RESKEY_ns " +if [[ -z $OCF_RESKEY_ns ]] ; then + RUN='' +else + RUN="$RUN_IN_NS " +fi + +########################################################################## + +usage() +{ + echo $USAGE >&2 +} + +meta_data() +{ +cat < + + +1.0 + +This script manages haproxy daemon with namespace support + +Manages an haproxy daemon inside an namespace + + + + + +Name of network namespace. +Should be present. + +Name of network namespace. + + + + + +The haproxy daemon configuration file name with full path. +For example, "/etc/haproxy/haproxy.cfg" + +Configuration file name with full path + + + + + +The haproxy pid file path. +For example, "/var/run/haproxy.pid" + +Full path to the haproxy pid file + + + + + +The haproxy binary path. +For example, "/usr/sbin/haproxy" + +Full path to the haproxy binary + + + + + +Extra command line arguments to pass to haproxy. +For example, "-f /etc/haproxy/shared.cfg" + +Extra command line arguments for haproxy + + + + + +Additional routes that should be added to this resource. Routes will be added via value namespace_interface. + +List of addtional routes to add routes for. + + + + + +The host part of the interface pair used to connect the namespace to the network +For example, "hapr-host" + +The name of the host interface used for namespace + + + + + +The namespace part of the interface pair used to connect the namespace to the network +For example, "hapr-ns" + +The name of the namespace interface used for namespace + + + + + +The IP address used by the host interface. Must be from the same subnet as namesapce IP +and uses network_mask to determine subnet. +Should not collide with any IP addresses already used in your network. +For example, "240.0.0.1" + +Host interface IP address + + + + + +The IP address used by the namespace interface. Must be from the same subnet as host IP +and uses network_mask to determine subnet. +Should not collide with any IP addresses already used in your network. +For example, "240.0.0.2" + +Namespace interface IP address + + + + + +The network mask length used to determine subnet of the host +and the namspace interfaces. +For example, "30" + +Network mask length + + + + + +The metric value of the default route set for the pipe +link connecting namespace and host. It should be set to +a large number to be higher then other default route metrics +that could be set to override this default route. +If other routes are set eithin the namespace thir metric should +be smaller then this number if you want them to be used istead of +this route. +For example, "1000" + +Namespace default route metric + + + + + +The debug flag for haproxy. + +HAProxy RA debug flag + + + + + + + + + + + + + + +END +exit $OCF_SUCCESS +} + +check_ns() { + local LH="${LL} check_ns():" + local ns=`ip netns list | grep "$OCF_RESKEY_ns"` + ocf_log debug "${LH} recieved netns list: ${ns}" + [[ $ns != $OCF_RESKEY_ns ]] && return $OCF_ERR_GENERIC + return $OCF_SUCCESS +} + +get_ns() { + local rc + local LH="${LL} get_ns():" + check_ns && return $OCF_SUCCESS + + ocf_run ip netns add $OCF_RESKEY_ns + rc=$? + ocf_run $RUN_IN_NS ip link set up dev lo + ocf_log debug "${LH} added netns ${OCF_RESKEY_ns} and set up lo" + + return $rc +} + +get_variables() { + local LH="${LL} get_variables():" + get_ns + CONF_FILE="${OCF_RESKEY_conffile}" + COMMAND="$RUN ${OCF_RESKEY_binpath}" + if [ -n "${OCF_RESKEY_pidfile}" ]; then + PIDFILE=$(grep -v "#" ${CONF_FILE} | grep "pidfile" | sed 's/^[ \t]*pidfile[ \t]*//') + else + PIDFILE="${OCF_RESKEY_pidfile}" + fi + ocf_log debug "${LH} set up variables and PIDFILE name" +} + +set_ns_routing() { + + nsip() { + ip netns exec "${OCF_RESKEY_ns}" ip ${@} + } + + # create host-ns veth pair unless it's present + ip link | grep -q -w "${OCF_RESKEY_host_interface}:" + if [ $? -gt 0 ]; then + ocf_log debug "Creating host interface: ${OCF_RESKEY_host_interface} and namespace interface: ${OCF_RESKEY_namespace_interface}" + ocf_run ip link add "${OCF_RESKEY_host_interface}" type veth peer name "${OCF_RESKEY_namespace_interface}" + fi + + # move the ns part to the namespace + ip link | grep -q -w "${OCF_RESKEY_namespace_interface}:" + if [ $? -eq 0 ]; then + ocf_log debug "Moving interface: ${OCF_RESKEY_namespace_interface} to namespace: ${OCF_RESKEY_ns}" + ocf_run ip link set dev "${OCF_RESKEY_namespace_interface}" netns "${OCF_RESKEY_ns}" + fi + + # up the host part + ocf_log debug "Bringing up host interface: ${OCF_RESKEY_host_interface}" + ocf_run ip link set "${OCF_RESKEY_host_interface}" up + + # set host part's ip + ip addr show dev "${OCF_RESKEY_host_interface}" | grep -q "inet ${OCF_RESKEY_host_ip}/${OCF_RESKEY_network_mask}" + if [ $? -gt 0 ]; then + ocf_log debug "Setting host interface: ${OCF_RESKEY_host_interface} IP to: ${OCF_RESKEY_host_ip}/${OCF_RESKEY_network_mask}" + ocf_run ip addr add "${OCF_RESKEY_host_ip}/${OCF_RESKEY_network_mask}" dev "${OCF_RESKEY_host_interface}" + fi + + # up the ns part + ocf_log debug "Bringing up the namespace interface: ${OCF_RESKEY_namespace_interface}" + ocf_run nsip link set "${OCF_RESKEY_namespace_interface}" up + + # set ns part's ip + nsip addr show dev "${OCF_RESKEY_namespace_interface}" | grep -q "inet ${OCF_RESKEY_namespace_ip}/${OCF_RESKEY_network_mask}" + if [ $? -gt 0 ]; then + ocf_log debug "Setting namespace interface: ${OCF_RESKEY_namespace_interface} IP to: ${OCF_RESKEY_namespace_ip}/${OCF_RESKEY_network_mask}" + ocf_run nsip addr add "${OCF_RESKEY_namespace_ip}/${OCF_RESKEY_network_mask}" dev "${OCF_RESKEY_namespace_interface}" + fi + + # set default gateway inside ns + nsip route list | grep -q "default via ${OCF_RESKEY_host_ip}" + if [ $? -gt 0 ]; then + ocf_log debug "Creating default route inside the namespace to ${OCF_RESKEY_host_ip} with metric ${OCF_RESKEY_route_metric}" + ocf_run nsip route add default via "${OCF_RESKEY_host_ip}" metric "${OCF_RESKEY_route_metric}" + fi + + # set masquerade on host node + iptables -t nat -L | grep -q masquerade-for-haproxy-namespace + if [ $? -gt 0 ]; then + ocf_log debug "Creating NAT rule on the host system for traffic from IP: ${OCF_RESKEY_namespace_ip}" + ocf_run iptables -t nat -A POSTROUTING -s "${OCF_RESKEY_namespace_ip}" -j MASQUERADE -m comment --comment "masquerade-for-haproxy-namespace" + fi + + if [[ "${OCF_RESKEY_other_networks}" != "false" ]] ; then + for network in ${OCF_RESKEY_other_networks} + do + ocf_log debug "Adding route on the host system to ${network}: ${OCF_RESKEY_namespace_ip}" + ocf_run $RUN_IN_NS ip route replace ${network} via ${OCF_RESKEY_host_ip} metric 10000 + done + fi +} + +haproxy_status() { + get_variables + + # check and make PID file dir + local PID_DIR="$( dirname ${PIDFILE} )" + if [ ! -d "${PID_DIR}" ] ; then + ocf_log debug "Create pid file dir: ${PID_DIR}" + mkdir -p "${PID_DIR}" + # no need to chown, root is user for haproxy + chmod 755 "${PID_DIR}" + fi + + if [ -n "${PIDFILE}" -a -f "${PIDFILE}" ]; then + # haproxy is probably running + # get pid from pidfile + PID="`cat ${PIDFILE}`" + if [ -n "${PID}" ]; then + # check if process exists + if $RUN ps -p "${PID}" | grep -q haproxy; then + ocf_log info "haproxy daemon running" + return $OCF_SUCCESS + else + ocf_log warn "haproxy daemon is not running but pid file exists" + return $OCF_NOT_RUNNING + fi + else + ocf_log err "PID file empty!" + return $OCF_ERR_GENERIC + fi + fi + # haproxy is not running + ocf_log info "haproxy daemon is not running" + return $OCF_NOT_RUNNING +} + +haproxy_start() +{ + get_variables + + # if haproxy is running return success + haproxy_status + retVal=$? + if [ $retVal -eq $OCF_SUCCESS ]; then + return $OCF_SUCCESS + elif [ $retVal -ne $OCF_NOT_RUNNING ]; then + ocf_log err "Error. Unknown status." + return $OCF_ERR_GENERIC + fi + + # run the haproxy binary + ocf_run ${COMMAND} ${OCF_RESKEY_extraconf} -f "${CONF_FILE}" -p "${PIDFILE}" + if [ $? -ne 0 ]; then + ocf_log err "Error. haproxy daemon returned error $?." + return $OCF_ERR_GENERIC + fi + + if [ "${OCF_RESKEY_ns}" != '' ]; then + set_ns_routing + fi + + ocf_log info "Started haproxy daemon." + return $OCF_SUCCESS +} + +haproxy_reload() +{ + get_variables + if haproxy_status; then + # get pid from pidfile + PID="`cat ${PIDFILE}`" + # reload haproxy binary replacing the old process + ocf_run ${COMMAND} ${OCF_RESKEY_extraconf} -f "${CONF_FILE}" -p "${PIDFILE}" -sf "${PID}" + if [ $? -ne 0 ]; then + ocf_log err "Error. haproxy daemon returned error $?." + return $OCF_ERR_GENERIC + fi + else + ocf_log info "Haproxy daemon is not running. Starting it." + haproxy_start + fi +} + +# Try to kill (SIGTERM) specified pid 5 times by 2 sec interval +haproxy_kill() +{ + local rc + local PID="$1" + local count=5 + local LH="${LL} haproxy_kill():" + + while [ $count -gt 0 ]; do + if [ -d /proc/${PID}/ ] ; then + ocf_log debug "${LH} Stopping haproxy daemon with SIGTERM... " + ocf_run kill "${PID}" + rc=$? + if [ $rc -eq 0 -a ! -d /proc/${PID}/ ]; then + ocf_log debug "${LH} Stopped haproxy daemon with SIGTERM" + return 0 + fi + else + return 0 + fi + sleep 2 + count=$(( count-1 )) + done + ocf_log debug "${LH} Failed to stop haproxy daemon with SIGTERM" + return 1 +} + +haproxy_stop() +{ + local rc + local LH="${LL} haproxy_stop():" + get_variables + if haproxy_status ; then + PID="`${RUN} cat ${PIDFILE}`" + if [ -z "${PID}" -o "${PID}" -eq "1" -o ! -d /proc/${PID}/ ]; then + ocf_log err "${LH} Cannot stop haproxy (PID=${PID})" + return $OCF_ERR_GENERIC + fi + haproxy_kill "${PID}" + rc=$? + if [ $rc -ne 0 -a -d /proc/${PID}/ ]; then + ocf_run kill -SIGKILL "${PID}" + ocf_log err "${LH} Failed to stop haproxy daemon gracefully. Killed with SIGKILL" + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "${LH} Error. Could not stop haproxy daemon." + return $OCF_ERR_GENERIC + fi + fi + ocf_log debug "Delete pid file: ${PIDFILE} with content ${PID}" + rm -f "${PIDFILE}" + fi + ocf_log info "Stopped haproxy daemon." + return $OCF_SUCCESS +} + +haproxy_monitor() +{ + haproxy_status +} + +haproxy_validate_all() +{ + get_variables + if [ -n "$OCF_RESKEY_binpath" -a ! -x "$OCF_RESKEY_binpath" ]; then + ocf_log err "Binary path $OCF_RESKEY_binpath does not exist." + return $OCF_ERR_ARGS + fi + if [ -n "$OCF_RESKEY_conffile" -a ! -f "$OCF_RESKEY_conffile" ]; then + ocf_log err "Config file $OCF_RESKEY_conffile does not exist." + return $OCF_ERR_ARGS + fi + + if grep -v "^#" "$CONF_FILE" | grep "pidfile" > /dev/null ; then + : + else + ocf_log err "Error. \"pidfile\" entry required in the haproxy config file by haproxy OCF RA." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +haproxy_restart() +{ + haproxy_stop + haproxy_start +} + +# +# Main +# + +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi +umask 0022 +export LL="${OCF_RESOURCE_INSTANCE}:" + +case $1 in + start) haproxy_start + ;; + + stop) haproxy_stop + ;; + + reload) haproxy_reload + ;; + + restart) haproxy_restart + ;; + + status) haproxy_status + ;; + + monitor) haproxy_monitor + ;; + + validate-all) haproxy_validate_all + ;; + + meta-data) meta_data + ;; + + usage) usage; exit $OCF_SUCCESS + ;; + + *) usage; exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/files/fuel-ha-utils/ocf/ns_ntp b/files/fuel-ha-utils/ocf/ns_ntp new file mode 100644 index 0000000000..8dc0ce4f1e --- /dev/null +++ b/files/fuel-ha-utils/ocf/ns_ntp @@ -0,0 +1,440 @@ +#!/bin/bash +# +# Resource script for ntp daemon with namespace support +# +# Description: Manages ntp daemon as an OCF resource in +# an High Availability setup inside a namespace +# +# HAProxy OCF script's Author: Mirantis +# License: GNU General Public License (GPL) +# +# usage: $0 {start|stop|restart|status|monitor|validate-all|meta-data} +# +# The "start" arg starts ntp. +# +# The "stop" arg stops it. +# +# OCF parameters: +# OCF_RESKEY_ns +# OCF_RESKEY_conffile +# OCF_RESKEY_pidfile +# OCF_RESKEY_binpath +# OCF_RESKEY_extraconf +# +# Note: This RA requires that the ntp config files has a "pidfile" +# entry so that it is able to act on the correct process +########################################################################## +# Initialization: + +OCF_ROOT_default="/usr/lib/ocf" + +OCF_RESKEY_ns_default="vrouter" +OCF_RESKEY_conffile_default="/etc/ntp.conf" +OCF_RESKEY_pidfile_default="/var/run/ntpd.pid" +OCF_RESKEY_binpath_default="/usr/sbin/ntpd" +OCF_RESKEY_extraconf_default="" + +OCF_RESKEY_host_interface_default="vrouter-host" +OCF_RESKEY_namespace_interface_default="vr-ns" +OCF_RESKEY_host_ip_default="240.0.0.5" +OCF_RESKEY_namespace_ip_default="240.0.0.6" +OCF_RESKEY_network_mask_default="30" +OCF_RESKEY_route_metric_default="10000" + +: ${OCF_ROOT=${OCF_ROOT_default}} + +: ${OCF_RESKEY_ns=${OCF_RESKEY_ns_default}} +: ${OCF_RESKEY_conffile=${OCF_RESKEY_conffile_default}} +: ${OCF_RESKEY_pidfile=${OCF_RESKEY_pidfile_default}} +: ${OCF_RESKEY_binpath=${OCF_RESKEY_binpath_default}} +: ${OCF_RESKEY_extraconf=${OCF_RESKEY_extraconf_default}} + +: ${OCF_RESKEY_host_interface=${OCF_RESKEY_host_interface_default}} +: ${OCF_RESKEY_namespace_interface=${OCF_RESKEY_namespace_interface_default}} +: ${OCF_RESKEY_host_ip=${OCF_RESKEY_host_ip_default}} +: ${OCF_RESKEY_namespace_ip=${OCF_RESKEY_namespace_ip_default}} +: ${OCF_RESKEY_network_mask=${OCF_RESKEY_network_mask_default}} +: ${OCF_RESKEY_route_metric=${OCF_RESKEY_route_metric_default}} + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat} +. ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs + +USAGE="Usage: $0 {start|stop|restart|status|monitor|validate-all|meta-data}"; + +RUN_IN_NS="ip netns exec $OCF_RESKEY_ns " +if [[ -z $OCF_RESKEY_ns ]] ; then + RUN='' +else + RUN="$RUN_IN_NS " +fi + +########################################################################## + +usage() +{ + echo $USAGE >&2 +} + +meta_data() +{ +cat < + + +1.0 + +This script manages ntp daemon with namespace support + +Manages an ntp daemon inside a namespace + + + + + +Name of network namespace. +Should be present. + +Name of network namespace. + + + + + +The ntp daemon configuration file name with full path. +For example, "/etc/ntp/ntp.cfg" + +Configuration file name with full path + + + + + +The ntp pid file path. +For example, "/var/run/ntp.pid" + +Full path to the ntp pid file + + + + + +The ntp binary path. +For example, "/usr/sbin/ntp" + +Full path to the ntp binary + + + + + +Extra command line arguments to pass to ntp. +For example, "-f /etc/ntp/shared.cfg" + +Extra command line arguments for ntp + + + + + +The host part of the interface pair used to connect the namespace to the network +For example, "vrouter-host" + +The name of the host interface used for namespace + + + + + +The namespace part of the interface pair used to connect the namespace to the network +For example, "vrouter-ns" + +The name of the namespace interface used for namespace + + + + + +The IP address used by the host interface. Must be from the same subnet as namesapce IP +and uses network_mask to determine subnet. +Should not collide with any IP addresses already used in your network. +For example, "240.0.0.5" + +Host interface IP address + + + + + +The IP address used by the namespace interface. Must be from the same subnet as host IP +and uses network_mask to determine subnet. +Should not collide with any IP addresses already used in your network. +For example, "240.0.0.6" + +Namespace interface IP address + + + + + +The network mask length used to determine subnet of the host +and the namspace interfaces. +For example, "30" + +Network mask length + + + + + +The metric value of the default route set for the pipe +link connecting namespace and host. It should be set to +a large number to be higher then other default route metrics +that could be set to override this default route. +If other routes are set eithin the namespace thir metric should +be smaller then this number if you want them to be used istead of +this route. +For example, "1000" + +Namespace default route metric + + + + + + + + + + + + + +END +exit $OCF_SUCCESS +} + +check_ns() { + local ns=`ip netns list | grep "$OCF_RESKEY_ns"` + [[ $ns != $OCF_RESKEY_ns ]] && return $OCF_ERR_GENERIC + return $OCF_SUCCESS +} + +get_ns() { + local rc + check_ns && return $OCF_SUCCESS + + ocf_run ip netns add $OCF_RESKEY_ns + rc=$? + ocf_run $RUN_IN_NS ip link set up dev lo + + return $rc +} + +get_variables() { + get_ns + + CONF_FILE="${OCF_RESKEY_conffile}" + COMMAND="$RUN ${OCF_RESKEY_binpath}" + PIDFILE="${OCF_RESKEY_pidfile}" +} + +set_ns_routing() { + nsip() { + ip netns exec "${OCF_RESKEY_ns}" ip ${@} + } + + # create host-ns veth pair unless it's present + ip link | grep -q -w "${OCF_RESKEY_host_interface}:" + if [ $? -gt 0 ]; then + ocf_log debug "Creating host interface: ${OCF_RESKEY_host_interface} and namespace interface: ${OCF_RESKEY_namespace_interface}" + ocf_run ip link add "${OCF_RESKEY_host_interface}" type veth peer name "${OCF_RESKEY_namespace_interface}" + fi + + # move the ns part to the namespace + ip link | grep -q -w "${OCF_RESKEY_namespace_interface}:" + if [ $? -eq 0 ]; then + ocf_log debug "Moving interface: ${OCF_RESKEY_namespace_interface} to namespace: ${OCF_RESKEY_ns}" + ocf_run ip link set dev "${OCF_RESKEY_namespace_interface}" netns "${OCF_RESKEY_ns}" + fi + + # up the host part + ocf_log debug "Bringing up host interface: ${OCF_RESKEY_host_interface}" + ocf_run ip link set "${OCF_RESKEY_host_interface}" up + + # set host part's ip + ip addr show dev "${OCF_RESKEY_host_interface}" | grep -q "inet ${OCF_RESKEY_host_ip}/${OCF_RESKEY_network_mask}" + if [ $? -gt 0 ]; then + ocf_log debug "Setting host interface: ${OCF_RESKEY_host_interface} IP to: ${OCF_RESKEY_host_ip}/${OCF_RESKEY_network_mask}" + ocf_run ip addr add "${OCF_RESKEY_host_ip}/${OCF_RESKEY_network_mask}" dev "${OCF_RESKEY_host_interface}" + fi + + # up the ns part + ocf_log debug "Bringing up the namespace interface: ${OCF_RESKEY_namespace_interface}" + ocf_run nsip link set "${OCF_RESKEY_namespace_interface}" up + + # set ns part's ip + nsip addr show dev "${OCF_RESKEY_namespace_interface}" | grep -q "inet ${OCF_RESKEY_namespace_ip}/${OCF_RESKEY_network_mask}" + if [ $? -gt 0 ]; then + ocf_log debug "Setting namespace interface: ${OCF_RESKEY_namespace_interface} IP to: ${OCF_RESKEY_namespace_ip}/${OCF_RESKEY_network_mask}" + ocf_run nsip addr add "${OCF_RESKEY_namespace_ip}/${OCF_RESKEY_network_mask}" dev "${OCF_RESKEY_namespace_interface}" + fi + + # set default gateway inside ns + nsip route list | grep -q "default via ${OCF_RESKEY_host_ip}" + if [ $? -gt 0 ]; then + ocf_log debug "Creating default route inside the namespace to ${OCF_RESKEY_host_ip} with metric ${OCF_RESKEY_route_metric}" + ocf_run nsip route add default via "${OCF_RESKEY_host_ip}" metric "${OCF_RESKEY_route_metric}" + fi + + # set masquerade on host node + iptables -t nat -L | grep -q masquerade-for-vrouter-namespace + if [ $? -gt 0 ]; then + ocf_log debug "Creating NAT rule on the host system for traffic from IP: ${OCF_RESKEY_namespace_ip}" + ocf_run iptables -t nat -A POSTROUTING -s "${OCF_RESKEY_namespace_ip}" -j MASQUERADE -m comment --comment "masquerade-for-vrouter-namespace" + fi +} + +ntp_status() { + get_variables + if [ -n "${PIDFILE}" -a -f "${PIDFILE}" ]; then + # ntp is probably running + # get pid from pidfile + PID="`cat ${PIDFILE}`" + if [ -n "${PID}" ]; then + # check if process exists + if $RUN ps -p "${PID}" | grep -q ntp; then + ocf_log info "ntp daemon running" + return $OCF_SUCCESS + else + ocf_log info "ntp daemon is not running but pid file exists" + return $OCF_NOT_RUNNING + fi + else + ocf_log err "PID file empty!" + return $OCF_ERR_GENERIC + fi + fi + # ntp is not running + ocf_log info "ntp daemon is not running" + return $OCF_NOT_RUNNING +} + +ntp_start() +{ + get_variables + # if ntp is running return success + ntp_status + retVal=$? + if [ $retVal -eq $OCF_SUCCESS ]; then + return $OCF_SUCCESS + elif [ $retVal -ne $OCF_NOT_RUNNING ]; then + ocf_log err "Error. Unknown status." + return $OCF_ERR_GENERIC + fi + + if [ "${OCF_RESKEY_ns}" != '' ]; then + set_ns_routing + fi + + # run the ntp binary + ocf_run ${COMMAND} ${OCF_RESKEY_extraconf} -u ntp:ntp -p "${PIDFILE}" -g -c ${CONF_FILE} + if [ $? -ne 0 ]; then + ocf_log err "Error. ntp daemon returned error $?." + return $OCF_ERR_GENERIC + fi + + ocf_log info "Started ntp daemon." + return $OCF_SUCCESS +} + +ntp_stop() +{ + get_variables + if ntp_status ; then + PID="`${RUN} cat ${PIDFILE}`" + if [ -n "${PID}" ] ; then + kill "${PID}" + if [ $? -ne 0 ]; then + kill -SIGKILL "${PID}" + if [ $? -ne 0 ]; then + ocf_log err "Error. Could not stop ntp daemon." + return $OCF_ERR_GENERIC + fi + fi + rm -f "${PIDFILE}" + fi + fi + ocf_log info "Stopped ntp daemon." + return $OCF_SUCCESS +} + +ntp_monitor() +{ + ntp_status +} + +ntp_validate_all() +{ + get_variables + if [ -n "$OCF_RESKEY_binpath" -a ! -x "$OCF_RESKEY_binpath" ]; then + ocf_log err "Binary path $OCF_RESKEY_binpath does not exist." + return $OCF_ERR_ARGS + fi + if [ -n "$OCF_RESKEY_conffile" -a ! -f "$OCF_RESKEY_conffile" ]; then + ocf_log err "Config file $OCF_RESKEY_conffile does not exist." + return $OCF_ERR_ARGS + fi + + if grep -v "^#" "$CONF_FILE" | grep "pidfile" > /dev/null ; then + : + else + ocf_log err "Error. \"pidfile\" entry required in the ntp config file by ntp OCF RA." + return $OCF_ERR_GENERIC + fi + + return $OCF_SUCCESS +} + +ntp_restart() +{ + ntp_stop + ntp_start +} + +# +# Main +# + +if [ $# -ne 1 ]; then + usage + exit $OCF_ERR_ARGS +fi + +case $1 in + start) ntp_start + ;; + + stop) ntp_stop + ;; + + restart) ntp_restart + ;; + + status) ntp_status + ;; + + monitor) ntp_monitor + ;; + + validate-all) ntp_validate_all + ;; + + meta-data) meta_data + ;; + + usage) usage; exit $OCF_SUCCESS + ;; + + *) usage; exit $OCF_ERR_UNIMPLEMENTED + ;; +esac diff --git a/files/fuel-ha-utils/ocf/ocf-neutron-dhcp-agent b/files/fuel-ha-utils/ocf/ocf-neutron-dhcp-agent new file mode 100644 index 0000000000..386f248439 --- /dev/null +++ b/files/fuel-ha-utils/ocf/ocf-neutron-dhcp-agent @@ -0,0 +1,672 @@ +#!/bin/bash +# +# +# OpenStack Neutron DHCP Service +# +# Description: Manages an OpenStack Neutron DHCP Service process as an HA resource +# +# Authors: Emilien Macchi +# Mainly inspired by the Nova Network resource agent written by Emilien Macchi & Sebastien Han +# +# Support: openstack@lists.launchpad.net +# License: Apache Software License (ASL) 2.0 +# +# +# See usage() function below for more details ... +# +# OCF instance parameters: +# OCF_RESKEY_binary +# OCF_RESKEY_config +# OCF_RESKEY_plugin_config +# OCF_RESKEY_user +# OCF_RESKEY_pid +# OCF_RESKEY_amqp_server_port +# OCF_RESKEY_additional_parameters +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +PATH=/sbin:/usr/sbin:/bin:/usr/bin + +OCF_RESKEY_binary_default="neutron-dhcp-agent" +OCF_RESKEY_config_default="/etc/neutron/neutron.conf" +OCF_RESKEY_keystone_config_default="/etc/keystone/keystone.conf" +OCF_RESKEY_plugin_config_default="/etc/neutron/dhcp_agent.ini" +OCF_RESKEY_log_file_default="/var/log/neutron/dhcp-agent.log" +OCF_RESKEY_check_state_reports_default=false +OCF_RESKEY_state_reports_file_default="/var/lib/neutron/dhcp_agent_report.log" +OCF_RESKEY_state_reports_timeout_default=60 +OCF_RESKEY_user_default="neutron" +OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" +OCF_RESKEY_amqp_server_port_default="5672" # or "none" for disable checking +OCF_RESKEY_os_auth_url_default="http://localhost:5000/v2.0" +OCF_RESKEY_username_default="neutron" +OCF_RESKEY_password_default="neutron_pass" +OCF_RESKEY_tenant_default="services" +OCF_RESKEY_multiple_agents_default='true' +OCF_RESKEY_rescheduling_tries_default=5 +OCF_RESKEY_rescheduling_interval_default=33 +OCF_RESKEY_debug_default='false' + + +: ${OCF_RESKEY_os_auth_url=${OCF_RESKEY_os_auth_url_default}} +: ${OCF_RESKEY_username=${OCF_RESKEY_username_default}} +: ${OCF_RESKEY_password=${OCF_RESKEY_password_default}} +: ${OCF_RESKEY_tenant=${OCF_RESKEY_tenant_default}} +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_keystone_config=${OCF_RESKEY_keystone_config_default}} +: ${OCF_RESKEY_plugin_config=${OCF_RESKEY_plugin_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_multiple_agents=${OCF_RESKEY_multiple_agents_default}} +: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}} +: ${OCF_RESKEY_log_file=${OCF_RESKEY_log_file_default}} +: ${OCF_RESKEY_check_state_reports=${OCF_RESKEY_check_state_reports_default}} +: ${OCF_RESKEY_state_reports_file=${OCF_RESKEY_state_reports_file_default}} +: ${OCF_RESKEY_state_reports_timeout=${OCF_RESKEY_state_reports_timeout_default}} +: ${OCF_RESKEY_rescheduling_tries=${OCF_RESKEY_rescheduling_tries_default}} +: ${OCF_RESKEY_rescheduling_interval=${OCF_RESKEY_rescheduling_interval_default}} +: ${OCF_RESKEY_debug=${OCF_RESKEY_debug_default}} + + + +####################################################################### + +usage() { + cat < + + +1.0 + + +Resource agent for the OpenStack Neutron DHCP Service (${OCF_RESKEY_binary}) +May manage a neutron-dhcp-agent instance or a clone set that +creates a distributed neutron-dhcp-agent cluster. + +Manages the OpenStack DHCP Service (${OCF_RESKEY_binary}) + + + + +Location of the OpenStack DHCP Agent server binary (${OCF_RESKEY_binary}) + +OpenStack DHCP Agent server binary (${OCF_RESKEY_binary}) + + + + + +Location of the OpenStack Neutron Service (neutron-server) configuration file + +OpenStack DHCP Agent (neutron-server) config file + + + + + +Location of the Keystone configuration file + +OpenStack Keystone config file + + + + + +Location of the OpenStack DHCP Service (${OCF_RESKEY_binary}) configuration file + +OpenStack DHCP Agent (${OCF_RESKEY_binary}) config file + + + + + +User running OpenStack DHCP Service (${OCF_RESKEY_binary}) + +OpenStack DHCP Service (${OCF_RESKEY_binary}) user + + + + + +Flag, that switch RCS-agent behavior for multiple or single DHCP-agent. + +Switsh between multiple or single DHCP-agent behavior + + + + + +The pid file to use for this OpenStack DHCP Service (${OCF_RESKEY_binary}) instance + +OpenStack DHCP Service (${OCF_RESKEY_binary}) pid file + + + + + +The listening port number of the AMQP server. +Mandatory to perform a monitor check. +Set to 'none' for disable this kind of checking. + +AMQP listening port + + + + + +The log file to use for this OpenStack DHCP Service (${OCF_RESKEY_binary}) instance + +OpenStack DHCP Service (${OCF_RESKEY_binary}) log file + + + + + +The flag, which enables or disables additional monitoring +based on agent's local state reports + +Enable or disable local state reports based monitoring + + + + + +This file contains DHCP agent local state report information. +There're three section in it: STARTUP, RPC_STATE_REPORT and SYNC_STATE. + +DHCP agent local state report file + + + + + +The timeout value for DHCP agent to update its local state report. +If it takes more time than the specified value agent is considered to be dead. + +DHCP agent local state reports timeout + + + + + +Neutron username for port list fetching + +Neutron username + + + + + +Neutron password for port list fetching + +Neutron password + + + + + +URL of keystone + +Keystone URL + + + + + +Admin tenant name + +Admin tenant + + + + + + Tries to start rescheduling script after start of agent. + + Tries to start rescheduling script after start of agent. + + + + + + Interval between starts of rescheduling script. + + Interval between starts of rescheduling script. + + + + + +Additional parameters to pass on to the OpenStack DHCP Service (${OCF_RESKEY_binary}) + +Additional parameters for neutron-dhcp-agent + + + + + + + + + + + + + + + + +END +} + +get_worker_pid() { + local options + local pid + # FIXME: Remove if condition and set 'falo' statically once Fuel + # discontinue support of Ubuntu 12.04 and CentOs 6.x where -a was not defined. + if pgrep -V | awk 'match($0, /[0-9]\.[0-9].*/) {if (substr($0, RSTART, RLENGTH) < 3.3) {exit 1}}'; then + options='falo' + else + options='flo' + fi + pid=`pgrep -u ${OCF_RESKEY_user} -${options} ${OCF_RESKEY_binary} | awk '/python \/usr\/bin/ {print $1}'` + echo $pid +} + +####################################################################### +# Functions invoked by resource manager actions + +neutron_dhcp_agent_validate() { + local rc + + check_binary $OCF_RESKEY_binary + check_binary netstat + + # A config file on shared storage that is not available + # during probes is OK. + if [ ! -f $OCF_RESKEY_config ]; then + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + true +} + +setup_auth() { + # setup token-based authentication if it possible + AUTH_TOKEN="" + + if [[ -f $OCF_RESKEY_keystone_config ]] ; then + AUTH_TOKEN=$(grep -v '#' $OCF_RESKEY_keystone_config | grep -i 'admin_token\s*=\s*' | awk -F'=' '{print $2}') + fi + + AUTH_TAIL="" + if [[ -n "$AUTH_TOKEN" ]] ; then + AUTH_TAIL="--admin-auth-url=${OCF_RESKEY_os_auth_url} --auth-token=${AUTH_TOKEN}" + fi + + true +} + +neutron_dhcp_agent_status() { + local pid + local f_pid + local rc + + # check and make PID file dir + local PID_DIR="$( dirname ${OCF_RESKEY_pid} )" + if [ ! -d "${PID_DIR}" ] ; then + ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}" + mkdir -p "${PID_DIR}" + chown -R ${OCF_RESKEY_user} "${PID_DIR}" + chmod 755 "${PID_DIR}" + fi + + pid=`get_worker_pid` + if [ "xxx$pid" == "xxx" ] ; then + ocf_log warn "OpenStack Neutron agent '$OCF_RESKEY_binary' not running." + return $OCF_NOT_RUNNING + fi + #ocf_log debug "PID='$pid'" + + # Check PID file and create if need + if [ ! -f $OCF_RESKEY_pid ] ; then + ocf_log warn "OpenStack Neutron agent (${OCF_RESKEY_binary}) was run, but no PID file found." + ocf_log warn "Writing PID='$pid' to '$OCF_RESKEY_pid' for '${OCF_RESKEY_binary}' worker..." + echo $pid > $OCF_RESKEY_pid + return $OCF_SUCCESS + fi + + # compare PID from file with PID from `pgrep...` + f_pid=`cat $OCF_RESKEY_pid | tr '\n' ' ' | awk '{print $1}'` + if [ "xxx$pid" == "xxx$f_pid" ]; then + return $OCF_SUCCESS + fi + + # at this point we have PID file and PID from it + # defferents with PID from `pgrep...` + if [ ! -d "/proc/$f_pid" ] || [ "xxx$f_pid" == "xxx" ] ; then + # process with PID from PID-file not found + ocf_log warn "Old PID file $OCF_RESKEY_pid found, but no running processes with PID=$f_pid found." + ocf_log warn "PID-file will be re-created (with PID=$pid)." + echo $pid > $OCF_RESKEY_pid + return $OCF_SUCCESS + fi + + # at this point we have alien PID-file and running prosess with this PID. + ocf_log warn "Another daemon (with PID=$f_pid) running with PID file '$OCF_RESKEY_pid'. My PID=$pid" + return $OCF_ERR_GENERIC +} + +get_local_reports_value() { + + local SECTION + local KEY + local VALUE + + SECTION=$1 + KEY=$2 + VALUE=$(awk "/$SECTION/,/}/" $OCF_RESKEY_state_reports_file | grep $KEY | awk '{$1=""; print $0}' | tr -d "\",") + echo $VALUE +} + +check_local_reports() { + + local SECTIONS + local SYSTIME + local RPC_STATE_TIMESTAMP + + if [ ! -f $OCF_RESKEY_state_reports_file ] ; then + ocf_log warn "State reports file wasn't found" + return $OCF_SUCCESS + fi + + SECTIONS="STARTUP RPC_STATE_REPORT SYNC_STATE" + SYSTIME=$(date +%s) + RPC_STATE_TIMESTAMP=$(get_local_reports_value RPC_STATE_REPORT Timestamp) + RPC_STATE_TIMESTAMP=${RPC_STATE_TIMESTAMP%.*} + + if [[ $(($SYSTIME-$RPC_STATE_TIMESTAMP)) -gt $OCF_RESKEY_state_reports_timeout ]] ; then + ocf_log err "Agent is not reporting for too long" + return $OCF_ERR_GENERIC + fi + + for SECTION in $SECTIONS ; do + STATUS=$(get_local_reports_value $SECTION Status) + if [[ "$STATUS" =~ "failure" ]] ; then + SINCE=$(date --date="$(get_local_reports_value SYNC_STATE Since)" +%s) + if [[ $(($SYSTIME-$SINCE)) -gt $OCF_RESKEY_state_reports_timeout ]] ; then + ocf_log err "$SECTION report is in failure status for too long" + return $OCF_ERR_GENERIC + fi + fi + done + return $OCF_SUCCESS +} + +get_ns_list() { + local rv=`ip netns list | grep -Ee "^qdhcp-.*"` + echo $rv +} + +get_pid_list_for_ns_list() { + # Parameters contain namespace names for searching pids + local ns_list="$@" + local pids=`for netns in $ns_list ; do ip netns pids $netns ; done` + echo $pids +} + +clean_up() { + # kill processes inside network namespaces + ns_list=`get_ns_list` + + # kill all proceses from all dhcp-agent's net.namespaces, that using ip + count=3 # we will try kill process 3 times + while [ $count -gt 0 ]; do + # we can't use ps, because ps can't select processes for given network namespace + inside_ns_pids=`get_pid_list_for_ns_list "$ns_list"` + if [ -z "$inside_ns_pids" ] ; then + break + fi + ocf_run kill $inside_ns_pids + sleep 1 + ((count--)) + done + + # kill all remaining proceses, that not died by simple kill + inside_ns_pids=`get_pid_list_for_ns_list "$ns_list"` + if [ ! -z "$inside_ns_pids" ] ; then + for ns_pid in $inside_ns_pids ; do + ocf_run kill -9 $ns_pid + done + fi + + # cleanup network interfaces + q-agent-cleanup.py --agent=dhcp --cleanup-ports +} + +clean_up_namespaces() { + # kill unnided network namespaces. + # + # Be carefully. In each network namespace shouldn't be any processes + # using network!!! use clean_up before it + ns_list=`get_ns_list` + if [ ! -z "$ns_list" ] ; then + for ns_name in $ns_list ; do + ocf_run ip --force netns del $ns_name + done + fi +} + +neutron_dhcp_agent_monitor() { + local rc + local pid + local network_amqp_check + + neutron_dhcp_agent_status + rc=$? + + # If status returned anything but success, return that immediately + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + + if ocf_is_true "$OCF_RESKEY_check_state_reports" ; then + check_local_reports + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + fi + + ocf_log debug "OpenStack DHCP Agent (neutron-dhcp-agent) monitor succeeded" + return $OCF_SUCCESS +} + + +neutron_dhcp_agent_start() { + local rc + neutron_dhcp_agent_status + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log info "OpenStack DHCP Agent (${OCF_RESKEY_binary}) already running" + return $OCF_SUCCESS + fi + + clean_up + sleep 1 + clean_up_namespaces + rm -f $OCF_RESKEY_state_reports_file + + # run and detach to background agent as daemon. + # Don't use ocf_run as we're sending the tool's output to /dev/null + su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \ + --config-file=$OCF_RESKEY_plugin_config --log-file=$OCF_RESKEY_log_file $OCF_RESKEY_additional_parameters \ + >> /dev/null"' 2>&1 & echo \$! > $OCF_RESKEY_pid' + ocf_log debug "Create pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + + # Spin waiting for the server to come up. + # Let the CRM/LRM time us out if required + while true; do + neutron_dhcp_agent_monitor + rc=$? + [ $rc -eq $OCF_SUCCESS ] && break + if [ $rc -ne $OCF_NOT_RUNNING ]; then + ocf_log err "OpenStack DHCP Agent (${OCF_RESKEY_binary}) start failed" + exit $OCF_ERR_GENERIC + fi + sleep 3 + done + + if ! ocf_is_true "$OCF_RESKEY_multiple_agents" ; then + # detach deffered rescheduling procedure + RESCHEDULING_CMD="q-agent-cleanup.py --agent=dhcp --reschedule --remove-dead ${AUTH_TAIL} 2>&1 >> /var/log/neutron/rescheduling.log" + RESCH_CMD='' + for ((i=0; i<$OCF_RESKEY_rescheduling_tries; i++)) ; do + RESCH_CMD="$RESCH_CMD sleep $OCF_RESKEY_rescheduling_interval ; $RESCHEDULING_CMD ;" + done + bash -c "$RESCH_CMD" & + fi + + ocf_log info "OpenStack DHCP Agent (${OCF_RESKEY_binary}) started" + return $OCF_SUCCESS +} + + +neutron_dhcp_agent_stop() { + local rc + local pid + + neutron_dhcp_agent_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + clean_up + ocf_log info "OpenStack DHCP Agent (${OCF_RESKEY_binary}) already stopped" + sleep 1 + clean_up_namespaces + return $OCF_SUCCESS + fi + + #Try SIGTERM + pid=`get_worker_pid` + # stop waiting + shutdown_timeout=15 + iteration_time=3 + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-6)) + fi + all_inside_ns_pids=`get_pid_list_for_ns_list $(get_ns_list)` + all_pids="$pid $all_inside_ns_pids" + count=0 + alive=1 + while [ $alive -gt 0 ] && [ $count -lt $shutdown_timeout ]; do + alive=0 + ocf_run kill -s TERM $all_pids + sleep $iteration_time + #Check if processes are alive after command kill + #if yes, send to them the term signal again + np="" + for pid in $all_pids ; do + ocf_run kill -s 0 $pid + if [ $? -eq 0 ]; then + np="$np $pid" + ((alive++)) + fi + done + if [ $alive -gt 0 ] ; then + all_pids=$np + fi + ((count+=$iteration_time)) + ocf_log debug "OpenStack DHCP Agent (${OCF_RESKEY_binary}) still hasn't stopped yet. Waiting ..." + done + #Send the kill signal to processes which are still alive + if [ $alive -gt 0 ] ; then + alive=0 + ocf_run kill -s KILL $all_pids + sleep 1 + for pid in $all_pids ; do + ocf_run kill -s 0 $pid + if [ $? -eq 0 ]; then + ((alive++)) + fi + done + if [ $alive -gt 0 ] ; then + ocf_log err "OpenStack DHCP Agent (${OCF_RESKEY_binary}) stop failed" + return $OCF_ERR_GENERIC + fi + fi + ocf_log info "OpenStack DHCP Agent (${OCF_RESKEY_binary}) stopped" + + ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + rm -f $OCF_RESKEY_pid + +# cleanup network interfaces + q-agent-cleanup.py --agent=dhcp --cleanup-ports + clean_up_namespaces + if ! ocf_is_true "$OCF_RESKEY_multiple_agents" ; then + q-agent-cleanup.py --agent=dhcp --remove-self ${AUTH_TAIL} 2>&1 >> /var/log/neutron/rescheduling.log & + fi + sleep 3 + + return $OCF_SUCCESS +} + +####################################################################### + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +neutron_dhcp_agent_validate || exit $? +setup_auth || exit $? +umask 0022 + +# What kind of method was invoked? +case "$1" in + start) neutron_dhcp_agent_start;; + stop) neutron_dhcp_agent_stop;; + status) neutron_dhcp_agent_status;; + monitor) neutron_dhcp_agent_monitor;; + validate-all) ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/files/fuel-ha-utils/ocf/ocf-neutron-l3-agent b/files/fuel-ha-utils/ocf/ocf-neutron-l3-agent new file mode 100644 index 0000000000..6f72c8f921 --- /dev/null +++ b/files/fuel-ha-utils/ocf/ocf-neutron-l3-agent @@ -0,0 +1,684 @@ +#!/bin/bash +# +# +# OpenStack L3 Service (neutron-l3-agent) +# +# Description: Manages an OpenStack L3 Service (neutron-l3-agent) process as an HA resource +# +# Authors: Emilien Macchi +# Mainly inspired by the Nova Network resource agent written by Emilien Macchi & Sebastien Han +# +# Support: openstack@lists.launchpad.net +# License: Apache Software License (ASL) 2.0 +# +# +# See usage() function below for more details ... +# +# OCF instance parameters: +# OCF_RESKEY_binary +# OCF_RESKEY_config +# OCF_RESKEY_plugin_config +# OCF_RESKEY_user +# OCF_RESKEY_pid +# OCF_RESKEY_neutron_server_port +# OCF_RESKEY_additional_parameters +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +PATH=/sbin:/usr/sbin:/bin:/usr/bin + +OCF_RESKEY_binary_default="neutron-l3-agent" +OCF_RESKEY_config_default="/etc/neutron/neutron.conf" +OCF_RESKEY_keystone_config_default="/etc/keystone/keystone.conf" +OCF_RESKEY_plugin_config_default="/etc/neutron/l3_agent.ini" +OCF_RESKEY_log_file_default="/var/log/neutron/l3-agent.log" +OCF_RESKEY_check_state_reports_default=false +OCF_RESKEY_state_reports_file_default="/var/lib/neutron/l3_agent_report.log" +OCF_RESKEY_state_reports_timeout_default=60 +OCF_RESKEY_user_default="neutron" +OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" +OCF_RESKEY_os_auth_url_default="http://localhost:5000/v2.0" +OCF_RESKEY_username_default="neutron" +OCF_RESKEY_password_default="neutron_pass" +OCF_RESKEY_tenant_default="services" +OCF_RESKEY_external_bridge_default="br-ex" +OCF_RESKEY_multiple_agents_default=true +OCF_RESKEY_rescheduling_tries_default=5 +OCF_RESKEY_rescheduling_interval_default=33 +OCF_RESKEY_debug_default=false + +: ${OCF_RESKEY_os_auth_url=${OCF_RESKEY_os_auth_url_default}} +: ${OCF_RESKEY_username=${OCF_RESKEY_username_default}} +: ${OCF_RESKEY_password=${OCF_RESKEY_password_default}} +: ${OCF_RESKEY_tenant=${OCF_RESKEY_tenant_default}} +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_keystone_config=${OCF_RESKEY_keystone_config_default}} +: ${OCF_RESKEY_plugin_config=${OCF_RESKEY_plugin_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_multiple_agents=${OCF_RESKEY_multiple_agents_default}} +: ${OCF_RESKEY_external_bridge=${OCF_RESKEY_external_bridge_default}} +: ${OCF_RESKEY_debug=${OCF_RESKEY_debug_default}} +: ${OCF_RESKEY_rescheduling_tries=${OCF_RESKEY_rescheduling_tries_default}} +: ${OCF_RESKEY_rescheduling_interval=${OCF_RESKEY_rescheduling_interval_default}} +: ${OCF_RESKEY_log_file=${OCF_RESKEY_log_file_default}} +: ${OCF_RESKEY_check_state_reports=${OCF_RESKEY_check_state_reports_default}} +: ${OCF_RESKEY_state_reports_file=${OCF_RESKEY_state_reports_file_default}} +: ${OCF_RESKEY_state_reports_timeout=${OCF_RESKEY_state_reports_timeout_default}} + +####################################################################### + +usage() { + cat < + + +1.0 + + +Resource agent for the OpenStack L3 agent (neutron-l3-agent) +May manage a neutron-l3-agent instance or a clone set that +creates a distributed neutron-l3-agent cluster. + +Manages the OpenStack L3 Service (neutron-l3-agent) + + + + +Location of the OpenStack L3 agent server binary (neutron-l3-agent) + +OpenStack L3 agent server binary (neutron-l3-agent) + + + + + +Location of the OpenStack L3 agent (neutron-server) configuration file + +OpenStack L3 agent (neutron-server) config file + + + + + +Location of the Keystone configuration file + +OpenStack Keystone config file + + + + + +Location of the OpenStack L3 Service (neutron-l3-agent) configuration file + +OpenStack L3 agent (neutron-l3-agent) config file + + + + + +User running OpenStack L3 Service (neutron-l3-agent) + +OpenStack L3 Service (neutron-l3-agent) user + + + + + +The pid file to use for this OpenStack L3 Service (neutron-l3-agent) instance + +OpenStack L3 Service (neutron-l3-agent) pid file + + + + + +Flag, that switch RCS-agent behavior for multiple or single L3-agent. + +Switsh between multiple or single L3-agent behavior + + + + + +The log file to use for this OpenStack L3 Service (neutron-l3-agent) instance + +OpenStack L3 Service (neutron-l3-agent) log file + + + + + +The flag, which enables or disables additional monitoring +based on agent's local state reports + +Enable or disable local state reports based monitoring + + + + + +This file contains L3 agent local state report information. +There're three section in it: STARTUP, RPC_STATE_REPORT and SYNC_STATE. + +L3 agent local state report file + + + + + +The timeout value for L3 agent to update its local state report. +If it takes more time than the specified value agent is considered to be dead. + +L3 agent local state reports timeout + + + + + +The listening port number of the AMQP server. Mandatory to perform a monitor check + +AMQP listening port + + + + + + +Neutron username for port list fetching + +Neutron username + + + + + + +Neutron password for port list fetching + +Neutron password + + + + + +URL of keystone + +Keystone URL + + + + + +Admin tenant name + +Admin tenant + + + + + +External bridge for l3-agent + +External bridge + + + + + + Enable debug logging + + Enable debug logging + + + + + + Tries to start rescheduling script after start of agent. + + Tries to start rescheduling script after start of agent. + + + + + + Interval between starts of rescheduling script. + + Interval between starts of rescheduling script. + + + + + + Enable logging to syslog + + Enable logging to syslog + + + + + +Additional parameters to pass on to the OpenStack L3 Service (neutron-l3-agent) + +Additional parameters for neutron-l3-agent + + + + + + + + + + + + + + + + +END +} + +get_worker_pid() { + local options + local pid + # FIXME: Remove if condition and set 'falo' statically once Fuel + # discontinue support of Ubuntu 12.04 and CentOs 6.x where -a was not defined. + if pgrep -V | awk 'match($0, /[0-9]\.[0-9].*/) {if (substr($0, RSTART, RLENGTH) < 3.3) {exit 1}}'; then + options='falo' + else + options='flo' + fi + pid=`pgrep -u ${OCF_RESKEY_user} -${options} ${OCF_RESKEY_binary} | awk '/python \/usr\/bin/ {print $1}'` + echo $pid +} + +####################################################################### +# Functions invoked by resource manager actions + +neutron_l3_agent_validate() { + local rc + + check_binary $OCF_RESKEY_binary + check_binary netstat + + # A config file on shared storage that is not available + # during probes is OK. + if [ ! -f $OCF_RESKEY_config ]; then + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + true +} + +setup_auth() { + # setup token-based authentication if it possible + AUTH_TOKEN="" + + if [[ -f $OCF_RESKEY_keystone_config ]] ; then + AUTH_TOKEN=$(grep -v '#' $OCF_RESKEY_keystone_config | grep -i 'admin_token\s*=\s*' | awk -F'=' '{print $2}') + fi + + AUTH_TAIL="" + if [[ -n "$AUTH_TOKEN" ]] ; then + AUTH_TAIL="--admin-auth-url=${OCF_RESKEY_os_auth_url} --auth-token=${AUTH_TOKEN}" + fi + + true +} + +neutron_l3_agent_status() { + local pid + local f_pid + local rc + + # check and make PID file dir + local PID_DIR="$( dirname ${OCF_RESKEY_pid} )" + if [ ! -d "${PID_DIR}" ] ; then + ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}" + mkdir -p "${PID_DIR}" + chown -R ${OCF_RESKEY_user} "${PID_DIR}" + chmod 755 "${PID_DIR}" + fi + + pid=`get_worker_pid` + if [ "xxx$pid" == "xxx" ] ; then + ocf_log warn "OpenStack Neutron agent '$OCF_RESKEY_binary' not running." + return $OCF_NOT_RUNNING + fi + #ocf_log debug "PID='$pid'" + + # Check PID file and create if need + if [ ! -f $OCF_RESKEY_pid ] ; then + ocf_log warn "OpenStack Neutron agent (${OCF_RESKEY_binary}) was run, but no PID file found." + ocf_log warn "Writing PID='$pid' to '$OCF_RESKEY_pid' for '${OCF_RESKEY_binary}' worker..." + echo $pid > $OCF_RESKEY_pid + return $OCF_SUCCESS + fi + + # compare PID from file with PID from `pgrep...` + f_pid=`cat $OCF_RESKEY_pid | tr '\n' ' ' | awk '{print $1}'` + if [ "xxx$pid" == "xxx$f_pid" ]; then + return $OCF_SUCCESS + fi + + # at this point we have PID file and PID from it + # differents with PID from `pgrep...` + if [ ! -d "/proc/$f_pid" ] || [ "xxx$f_pid" == "xxx" ] ; then + # process with PID from PID-file not found + ocf_log warn "Old PID file $OCF_RESKEY_pid found, but no running processes with PID=$f_pid found." + ocf_log warn "PID-file will be re-created (with PID=$pid)." + echo $pid > $OCF_RESKEY_pid + return $OCF_SUCCESS + fi + + # at this point we have alien PID-file and running prosess with this PID. + ocf_log warn "Another daemon (with PID=$f_pid) running with PID file '$OCF_RESKEY_pid'. My PID=$pid" + return $OCF_ERR_GENERIC +} + +get_local_reports_value() { + + local SECTION + local KEY + local VALUE + + SECTION=$1 + KEY=$2 + # 'cat' is done here to avoid problems with file opening by awk due to permanent rewrites by the agent + VALUE=$(awk "/$SECTION/,/}/" $OCF_RESKEY_state_reports_file | grep $KEY | awk '{$1=""; print $0}' | tr -d "\",") + echo $VALUE +} + +check_local_reports() { + + local SECTIONS + local SYSTIME + local RPC_STATE_TIMESTAMP + + if [ ! -f $OCF_RESKEY_state_reports_file ] ; then + ocf_log warn "State reports file wasn't found" + return $OCF_SUCCESS + fi + + SECTIONS="STARTUP RPC_STATE_REPORT SYNC_STATE" + SYSTIME=$(date +%s) + RPC_STATE_TIMESTAMP=$(get_local_reports_value RPC_STATE_REPORT Timestamp) + RPC_STATE_TIMESTAMP=${RPC_STATE_TIMESTAMP%.*} + + if [[ $(($SYSTIME-$RPC_STATE_TIMESTAMP)) -gt $OCF_RESKEY_state_reports_timeout ]] ; then + ocf_log err "Agent is not reporting for too long" + return $OCF_ERR_GENERIC + fi + + for SECTION in $SECTIONS ; do + STATUS=$(get_local_reports_value $SECTION Status) + if [[ "$STATUS" =~ "failure" ]] ; then + SINCE=$(date --date="$(get_local_reports_value SYNC_STATE Since)" +%s) + if [[ $(($SYSTIME-$SINCE)) -gt $OCF_RESKEY_state_reports_timeout ]] ; then + ocf_log err "$SECTION report is in failure status for too long" + return $OCF_ERR_GENERIC + fi + fi + done + return $OCF_SUCCESS +} + +get_ns_list() { + local rv=`ip netns list | grep -Ee "^qrouter-.*"` + echo $rv +} + +get_pid_list_for_ns_list() { + # Parameters contain namespace names for searching pids + local ns_list="$@" + local pids=`for netns in $ns_list ; do ip netns pids $netns ; done` + echo $pids +} + +clean_up() { + # kill processes inside network namespaces + ns_list=`get_ns_list` + + # kill all proceses from all dhcp-agent's net.namespaces, that using ip + count=3 # we will try kill process 3 times + while [ $count -gt 0 ]; do + # we can't use ps, because ps can't select processes for given network namespace + inside_ns_pids=`get_pid_list_for_ns_list "$ns_list"` + if [ -z "$inside_ns_pids" ] ; then + break + fi + for ns_pid in $inside_ns_pids ; do + ocf_run kill $ns_pid + done + sleep 1 + count=$(($count - 1)) + done + + # kill all remaining proceses, that not died by simple kill + inside_ns_pids=`get_pid_list_for_ns_list "$ns_list"` + if [ ! -z "$inside_ns_pids" ] ; then + for ns_pid in $inside_ns_pids ; do + ocf_run kill -9 $ns_pid + done + fi + + # cleanup network interfaces + q-agent-cleanup.py --agent=l3 --cleanup-ports +} + +clean_up_namespaces() { + # kill unnided network namespaces. + # + # Be carefully. In each network namespace shouldn't be any processes + # using network!!! use clean_up before it + ns_list=`get_ns_list` + if [ ! -z "$ns_list" ] ; then + for ns_name in $ns_list ; do + ocf_run ip --force netns del $ns_name + done + fi +} + +neutron_l3_agent_monitor() { + neutron_l3_agent_status + rc=$? + if ocf_is_true "$OCF_RESKEY_check_state_reports" ; then + if [ $rc -eq 0 ]; then + check_local_reports + rc=$? + fi + fi + return $rc + +} + + +neutron_l3_agent_start() { + local rc + + neutron_l3_agent_status + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log info "OpenStack neutron-l3-agent already running" + return $OCF_SUCCESS + fi + + clean_up + sleep 1 + clean_up_namespaces + rm -f $OCF_RESKEY_state_reports_file + + # run and detach to background agent as daemon. + # Don't use ocf_run as we're sending the tool's output to /dev/null + su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \ + --config-file=$OCF_RESKEY_plugin_config --log-file=$OCF_RESKEY_log_file $OCF_RESKEY_additional_parameters \ + >> /dev/null"' 2>&1 & echo \$! > $OCF_RESKEY_pid' + ocf_log debug "Create pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + + # Spin waiting for the server to come up. + # Let the CRM/LRM time us out if required + while true; do + neutron_l3_agent_monitor + rc=$? + [ $rc -eq $OCF_SUCCESS ] && break + if [ $rc -ne $OCF_NOT_RUNNING ] ; then + ocf_log err "OpenStack neutron-l3-agent start failed" + exit $OCF_ERR_GENERIC + fi + sleep 3 + done + + if ! ocf_is_true "$OCF_RESKEY_multiple_agents" ; then + # detach deferred rescheduling procedure + RESCHEDULING_CMD="q-agent-cleanup.py --agent=l3 --reschedule --remove-dead ${AUTH_TAIL} 2>&1 >> /var/log/neutron/rescheduling.log" + RESCH_CMD='' + for ((i=0; i<$OCF_RESKEY_rescheduling_tries; i++)) ; do + RESCH_CMD="$RESCH_CMD sleep $OCF_RESKEY_rescheduling_interval ; $RESCHEDULING_CMD ;" + done + bash -c "$RESCH_CMD" & + fuel-fdb-cleaner --ssh-keyfile /root/.ssh/id_rsa_neutron -l /var/log/neutron/fdb-cleaner.log + fi + + ocf_log info "OpenStack L3 agent (neutron-l3-agent) started" + return $OCF_SUCCESS +} + +neutron_l3_agent_stop() { + local rc + local pid + + neutron_l3_agent_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + clean_up + sleep 1 + clean_up_namespaces + ocf_log info "OpenStack L3 agent ($OCF_RESKEY_binary) already stopped" + return $OCF_SUCCESS + fi + + # Try SIGTERM + pid=`get_worker_pid` + # stop waiting + shutdown_timeout=15 + iteration_time=3 + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-6)) + fi + all_inside_ns_pids=`get_pid_list_for_ns_list $(get_ns_list)` + all_pids="$pid $all_inside_ns_pids" + count=0 + alive=1 + while [ $alive -gt 0 ] && [ $count -lt $shutdown_timeout ]; do + alive=0 + ocf_run kill -s TERM $all_pids + sleep $iteration_time + #Check if processes are alive after command kill + #if yes, send to them the term signal again + np="" + for pid in $all_pids ; do + ocf_run kill -s 0 $pid + if [ $? -eq 0 ]; then + np="$np $pid" + ((alive++)) + fi + done + if [ $alive -gt 0 ] ; then + all_pids=$np + fi + ((count+=$iteration_time)) + ocf_log debug "OpenStack L3 agent ($OCF_RESKEY_binary) still hasn't stopped yet. Waiting ..." + done + #Send the kill signal to processes which are still alive + if [ $alive -gt 0 ] ; then + alive=0 + ocf_run kill -s KILL $all_pids + sleep 1 + for pid in $all_pids ; do + ocf_run kill -s 0 $pid + if [ $? -eq 0 ]; then + ((alive++)) + fi + done + if [ $alive -gt 0 ] ; then + ocf_log err "OpenStack L3 agent (${OCF_RESKEY_binary}) stop failed" + return $OCF_ERR_GENERIC + fi + fi + ocf_log info "OpenStack L3 agent ($OCF_RESKEY_binary) stopped" + + ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + rm -f $OCF_RESKEY_pid + clean_up + sleep 1 + clean_up_namespaces + if ! ocf_is_true "$OCF_RESKEY_multiple_agents" ; then + q-agent-cleanup.py --agent=l3 --remove-self ${AUTH_TAIL} 2>&1 >> /var/log/neutron/rescheduling.log & + fi + sleep 3 + + return $OCF_SUCCESS +} + +####################################################################### + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +neutron_l3_agent_validate || exit $? +setup_auth || exit $? +umask 0022 + +# What kind of method was invoked? +case "$1" in + start) neutron_l3_agent_start;; + stop) neutron_l3_agent_stop;; + status) neutron_l3_agent_status;; + monitor) neutron_l3_agent_monitor;; + validate-all) ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac + diff --git a/files/fuel-ha-utils/ocf/ocf-neutron-metadata-agent b/files/fuel-ha-utils/ocf/ocf-neutron-metadata-agent new file mode 100644 index 0000000000..1192622242 --- /dev/null +++ b/files/fuel-ha-utils/ocf/ocf-neutron-metadata-agent @@ -0,0 +1,366 @@ +#!/bin/bash +# +# +# OpenStack Neutron Metadata Agent service +# +# Description: Manages an OpenStack Neutron Metadata Agent process as an HA resource +# +# Authors: Emilien Macchi +# Mainly inspired by the Nova Network resource agent written by Emilien Macchi & Sebastien Han +# +# Support: openstack@lists.launchpad.net +# License: Apache Software License (ASL) 2.0 +# +# +# See usage() function below for more details ... +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +PATH=/sbin:/usr/sbin:/bin:/usr/bin + +OCF_RESKEY_binary_default="neutron-metadata-agent" +OCF_RESKEY_config_default="/etc/neutron/neutron.conf" +OCF_RESKEY_agent_config_default="/etc/neutron/metadata_agent.ini" +OCF_RESKEY_log_file_default="/var/log/neutron/metadata-agent.log" +OCF_RESKEY_user_default="neutron" +OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" +OCF_RESKEY_debug_default="false" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_agent_config=${OCF_RESKEY_agent_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_log_file=${OCF_RESKEY_log_file_default}} +: ${OCF_RESKEY_debug=${OCF_RESKEY_debug_default}} + +####################################################################### + +usage() { + cat < + + +1.0 + + +Resource agent for the OpenStack Neutron Metadata Agent +May manage a neutron-metadata-agent instance or a clone set that +creates a distributed neutron-metadata-agent cluster. + +Manages the OpenStack OVS Service (${OCF_RESKEY_binary}) + + + + +Location of the Neutron Metadata Agent binary + +Neutron Metadata Agent binary + + + + + +Location of the OpenStack Neutron Service (neutron-server) configuration file + +OpenStack OVS Server (neutron-server) config file + + + + + +Location of the OpenStack Neutron Metadata Agent configuration file + +OpenStack Neutron Metadata Agent config file + + + + + +User running Neutron Metadata Agent service (${OCF_RESKEY_binary}) + +OpenStack Neutron Metadata Agent service (${OCF_RESKEY_binary}) user + + + + + +The pid file to use for this Neutron Metadata Agent service (${OCF_RESKEY_binary}) instance + +OpenStack Neutron Metadata Agent service (${OCF_RESKEY_binary}) pid file + + + + + +The log file to use for this OpenStack Metadata Service (${OCF_RESKEY_binary}) instance + +OpenStack Metadata Service (${OCF_RESKEY_binary}) log file + + + + + +The debug flag for OpenStack Metadata Service (${OCF_RESKEY_binary}) instance + +OpenStack Metadata Service (${OCF_RESKEY_binary}) debug flag + + + + + + + + + + + + + + +END +} + +get_worker_pid() { + local options + local pid + # FIXME: Remove if condition and set 'falo' statically once Fuel + # discontinue support of Ubuntu 12.04 and CentOs 6.x where -a was not defined. + if pgrep -V | awk 'match($0, /[0-9]\.[0-9].*/) {if (substr($0, RSTART, RLENGTH) < 3.3) {exit 1}}'; then + options='falo' + else + options='flo' + fi + pid=`pgrep -u ${OCF_RESKEY_user} -${options} ${OCF_RESKEY_binary} | awk '/python \/usr\/bin/ {print $1}'` + echo $pid +} + +####################################################################### +# Functions invoked by resource manager actions + +neutron_metadata_agent_validate() { + local rc + + check_binary $OCF_RESKEY_binary + check_binary netstat + + # A config file on shared storage that is not available + # during probes is OK. + if [ ! -f $OCF_RESKEY_config ]; then + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + true +} + + +neutron_metadata_agent_status() { + local pid + local f_pid + local rc + + # check and make PID file dir + local PID_DIR="$( dirname ${OCF_RESKEY_pid} )" + if [ ! -d "${PID_DIR}" ] ; then + ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}" + mkdir -p "${PID_DIR}" + chown -R ${OCF_RESKEY_user} "${PID_DIR}" + chmod 755 "${PID_DIR}" + fi + + pid=`get_worker_pid` + if [ "xxx$pid" == "xxx" ] ; then + ocf_log warn "OpenStack Neutron agent '$OCF_RESKEY_binary' not running." + return $OCF_NOT_RUNNING + fi + + # Check PID file and create if need + if [ ! -f $OCF_RESKEY_pid ] ; then + ocf_log warn "OpenStack Neutron agent (${OCF_RESKEY_binary}) was run, but no PID file found." + ocf_log warn "Writing PID='$pid' to '$OCF_RESKEY_pid' for '${OCF_RESKEY_binary}' worker..." + echo $pid > $OCF_RESKEY_pid + return $OCF_SUCCESS + fi + + # compare PID from file with PID from `pgrep...` + f_pid=`cat $OCF_RESKEY_pid | tr '\n' ' ' | awk '{print $1}'` + if [ "xxx$pid" == "xxx$f_pid" ]; then + return $OCF_SUCCESS + fi + + # at this point we have PID file and PID from it + # defferents with PID from `pgrep...` + if [ ! -d "/proc/$f_pid" ] || [ "xxx$f_pid" == "xxx" ] ; then + # process with PID from PID-file not found + ocf_log warn "Old PID file $OCF_RESKEY_pid found, but no running processes with PID=$f_pid found." + ocf_log warn "PID-file will be re-created (with PID=$pid)." + echo $pid > $OCF_RESKEY_pid + return $OCF_SUCCESS + fi + + # at this point we have alien PID-file and running prosess with this PID. + ocf_log warn "Another daemon (with PID=$f_pid) running with PID file '$OCF_RESKEY_pid'. My PID=$pid" + return $OCF_ERR_GENERIC +} + + +neutron_metadata_agent_monitor() { + neutron_metadata_agent_status + rc=$? + return $rc +} + + +neutron_metadata_agent_start() { + local rc + + neutron_metadata_agent_status + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log info "OpenStack Neutron Metadata Agent (${OCF_RESKEY_binary}) already running" + return $OCF_SUCCESS + fi + + # run and detach to background Neutron-metadata-agent as daemon. + # Don't use ocf_run as we're sending the tool's output to /dev/null + su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \ + --config-file=$OCF_RESKEY_agent_config --log-file=$OCF_RESKEY_log_file $OCF_RESKEY_additional_parameters \ + >> /dev/null"' 2>&1 & echo \$! > $OCF_RESKEY_pid' + ocf_log debug "Create pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + + # Spin waiting for the server to come up. + # Let the CRM/LRM time us out if required + while true; do + neutron_metadata_agent_monitor + rc=$? + [ $rc -eq $OCF_SUCCESS ] && break + if [ $rc -ne $OCF_NOT_RUNNING ] ; then + ocf_log err "OpenStack (${OCF_RESKEY_binary}) start failed" + exit $OCF_ERR_GENERIC + fi + sleep 3 + done + + ocf_log info "OpenStack Neutron Metadata Agent (${OCF_RESKEY_binary}) started" + return $OCF_SUCCESS +} + + +neutron_metadata_agent_stop() { + local rc + local pid + + neutron_metadata_agent_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + ocf_log info "OpenStack Neutron Metadata Agent (${OCF_RESKEY_binary}) already stopped" + return $OCF_SUCCESS + fi + + # Try SIGTERM + pid=`get_worker_pid` + if [ "xxx$pid" == "xxx" ] ; then + ocf_log warn "OpenStack Neutron Metadata Agent (${OCF_RESKEY_binary}) not running." + #return $OCF_NOT_RUNNING + return $OCF_SUCCESS + fi + ocf_run kill -s TERM $pid + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "OpenStack Neutron Metadata Agent (${OCF_RESKEY_binary}) couldn't be stopped" + exit $OCF_ERR_GENERIC + fi + + # stop waiting + shutdown_timeout=15 + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) + fi + count=0 + while [ $count -lt $shutdown_timeout ]; do + neutron_metadata_agent_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + break + fi + count=`expr $count + 1` + sleep 1 + ocf_log debug "OpenStack Neutron Metadata Agent (${OCF_RESKEY_binary}) still hasn't stopped yet. Waiting ..." + done + + neutron_metadata_agent_status + rc=$? + if [ $rc -ne $OCF_NOT_RUNNING ]; then + # SIGTERM didn't help either, try SIGKILL + ocf_log info "OpenStack Neutron Metadata Agent (${OCF_RESKEY_binary}) failed to stop after ${shutdown_timeout}s \ + using SIGTERM. Trying SIGKILL ..." + ocf_run kill -s KILL $pid + fi + + ocf_log info "OpenStack Neutron Metadata Agent (${OCF_RESKEY_binary}) stopped" + + ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + rm -f $OCF_RESKEY_pid + + return $OCF_SUCCESS +} + +####################################################################### + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +neutron_metadata_agent_validate || exit $? +umask 0022 + +# What kind of method was invoked? +case "$1" in + start) neutron_metadata_agent_start;; + stop) neutron_metadata_agent_stop;; + status) neutron_metadata_agent_status;; + monitor) neutron_metadata_agent_monitor;; + validate) neutron_metadata_agent_validate;; + validate-all) neutron_metadata_agent_validate;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/files/fuel-ha-utils/ocf/ocf-neutron-ovs-agent b/files/fuel-ha-utils/ocf/ocf-neutron-ovs-agent new file mode 100644 index 0000000000..0f7c9edc42 --- /dev/null +++ b/files/fuel-ha-utils/ocf/ocf-neutron-ovs-agent @@ -0,0 +1,405 @@ +#!/bin/bash +# +# +# OpenStack OVS Service +# +# Description: Manages an OpenStack OVS Service process as an HA resource +# +# Authors: Emilien Macchi +# Mainly inspired by the Nova Network resource agent written by Emilien Macchi & Sebastien Han +# +# Support: openstack@lists.launchpad.net +# License: Apache Software License (ASL) 2.0 +# +# +# See usage() function below for more details ... +# +# OCF instance parameters: +# OCF_RESKEY_binary +# OCF_RESKEY_config +# OCF_RESKEY_plugin_config +# OCF_RESKEY_user +# OCF_RESKEY_pid +# OCF_RESKEY_amqp_server_port +# OCF_RESKEY_additional_parameters +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +OCF_RESKEY_binary_default="neutron-openvswitch-agent" +OCF_RESKEY_config_default="/etc/neutron/neutron.conf" +OCF_RESKEY_plugin_config_default="/etc/neutron/plugin.ini" +OCF_RESKEY_user_default="neutron" +OCF_RESKEY_pid_default="${HA_RSCTMP}/${__SCRIPT_NAME}/${__SCRIPT_NAME}.pid" +OCF_RESKEY_amqp_server_port_default="5672" +OCF_RESKEY_segmentation_bridge_default="br-tun" +OCF_RESKEY_log_file_default="/var/log/neutron/openvswitch-agent.log" +OCF_RESKEY_debug_default='false' + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_plugin_config=${OCF_RESKEY_plugin_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_amqp_server_port=${OCF_RESKEY_amqp_server_port_default}} +: ${OCF_RESKEY_segmentation_bridge=${OCF_RESKEY_segmentation_bridge_default}} +: ${OCF_RESKEY_log_file=${OCF_RESKEY_log_file_default}} +: ${OCF_RESKEY_debug=${OCF_RESKEY_debug_default}} + +####################################################################### + +usage() { + cat < + + +1.0 + + +Resource agent for the OpenStack Neutron OVS Service (${OCF_RESKEY_binary}) +May manage a neutron-ovs-agent instance or a clone set that +creates a distributed neutron-ovs-agent cluster. + +Manages the OpenStack OVS Service (${OCF_RESKEY_binary}) + + + + +Location of the OpenStack OVS agent binary (${OCF_RESKEY_binary}) + +OpenStack OVS agent binary (${OCF_RESKEY_binary}) + + + + + +Location of the OpenStack Neutron Service (neutron-server) configuration file + +OpenStack Neutron Server (neutron-server) config file + + + + + +Location of the OpenStack OVS agent (${OCF_RESKEY_binary}) configuration file + +OpenStack OVS agent (${OCF_RESKEY_binary}) config file + + + + + +User running OpenStack OVS Service (${OCF_RESKEY_binary}) + +OpenStack OVS Service (${OCF_RESKEY_binary}) user + + + + + +The pid file to use for this OpenStack OVS Service (${OCF_RESKEY_binary}) instance + +OpenStack OVS Service (${OCF_RESKEY_binary}) pid file + + + + + + +Additional parameters to pass on to the OpenStack OVS Service (${OCF_RESKEY_binary}) + +Additional parameters for neutron-ovs-agent + + + + + +OVS integration bridge name OpenStack OVS Service (${OCF_RESKEY_binary}) + +Segmentation bridge name for neutron-ovs-agent + + + + + +The log file to use for this OpenStack OVS Service (${OCF_RESKEY_binary}) instance + +OpenStack OVS Service (${OCF_RESKEY_binary}) log file + + + + + +The debug flag for OpenStack OVS Service (${OCF_RESKEY_binary}) instance + +OpenStack Metadata Service (${OCF_RESKEY_binary}) debug flag + + + + + + + + + + + + + + +END +} + +get_worker_pid() { + local options + local pid + # FIXME: Remove if condition and set 'falo' statically once Fuel + # discontinue support of Ubuntu 12.04 and CentOs 6.x where -a was not defined. + if pgrep -V | awk 'match($0, /[0-9]\.[0-9].*/) {if (substr($0, RSTART, RLENGTH) < 3.3) {exit 1}}'; then + options='falo' + else + options='flo' + fi + pid=`pgrep -u ${OCF_RESKEY_user} -${options} ${OCF_RESKEY_binary} | awk '/python \/usr\/bin/ {print $1}'` + echo $pid +} + +####################################################################### +# Functions invoked by resource manager actions + +neutron_ovs_agent_validate() { + local rc + + check_binary $OCF_RESKEY_binary + check_binary netstat + + # A config file on shared storage that is not available + # during probes is OK. + if [ ! -f $OCF_RESKEY_config ]; then + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + true +} + + +neutron_ovs_agent_status() { + local pid + local f_pid + local rc + + # check and make PID file dir + local PID_DIR="$( dirname ${OCF_RESKEY_pid} )" + if [ ! -d "${PID_DIR}" ] ; then + ocf_log debug "Create pid file dir: ${PID_DIR} and chown to ${OCF_RESKEY_user}" + mkdir -p "${PID_DIR}" + chown -R ${OCF_RESKEY_user} "${PID_DIR}" + chmod 755 "${PID_DIR}" + fi + + pid=`get_worker_pid` + if [ "xxx$pid" == "xxx" ] ; then + ocf_log warn "OpenStack Neutron agent '$OCF_RESKEY_binary' not running." + return $OCF_NOT_RUNNING + fi + #ocf_log debug "PID='$pid'" + + # Check PID file and create if need + if [ ! -f $OCF_RESKEY_pid ] ; then + ocf_log warn "OpenStack Neutron agent (${OCF_RESKEY_binary}) was run, but no PID file found." + ocf_log warn "Writing PID='$pid' to '$OCF_RESKEY_pid' for '${OCF_RESKEY_binary}' worker..." + echo $pid > $OCF_RESKEY_pid + return $OCF_SUCCESS + fi + + # compare PID from file with PID from `pgrep...` + f_pid=`cat $OCF_RESKEY_pid | tr '\n' ' ' | awk '{print $1}'` + if [ "xxx$pid" == "xxx$f_pid" ]; then + return $OCF_SUCCESS + fi + + # at this point we have PID file and PID from it + # defferents with PID from `pgrep...` + if [ ! -d "/proc/$f_pid" ] || [ "xxx$f_pid" == "xxx" ] ; then + # process with PID from PID-file not found + ocf_log warn "Old PID file $OCF_RESKEY_pid found, but no running processes with PID=$f_pid found." + ocf_log warn "PID-file will be re-created (with PID=$pid)." + echo $pid > $OCF_RESKEY_pid + return $OCF_SUCCESS + fi + + # at this point we have alien PID-file and running prosess with this PID. + ocf_log warn "Another daemon (with PID=$f_pid) running with PID file '$OCF_RESKEY_pid'. My PID=$pid" + return $OCF_ERR_GENERIC +} + + +neutron_ovs_agent_monitor() { + neutron_ovs_agent_status + rc=$? + return $rc +} + + +cleanup_agents_ovs_bridges() { + # todo: remove ports, created by agent + rc=0 +} + + +neutron_ovs_agent_start() { + local rc + + neutron_ovs_agent_status + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log info "OpenStack OVS agent (${OCF_RESKEY_binary}) already running" + return $OCF_SUCCESS + fi + + cleanup_agents_ovs_bridges + + # run and detach to background Neutron-ovs-agent as daemon. + # Don't use ocf_run as we're sending the tool's output + su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \ + --config-file=$OCF_RESKEY_plugin_config --log-file=$OCF_RESKEY_log_file $OCF_RESKEY_additional_parameters \ + >> /dev/null"' 2>&1 & echo \$! > $OCF_RESKEY_pid' + ocf_log debug "Create pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + + # Spin waiting for the server to come up. + # Let the CRM/LRM time us out if required + while true; do + neutron_ovs_agent_monitor + rc=$? + [ $rc -eq $OCF_SUCCESS ] && break + if [ $rc -ne $OCF_NOT_RUNNING ]; then + ocf_log err "OpenStack (${OCF_RESKEY_binary}) start failed" + exit $OCF_ERR_GENERIC + fi + sleep 3 + done + + ocf_log info "OpenStack OVS agent (${OCF_RESKEY_binary}) started" + return $OCF_SUCCESS +} + +neutron_ovs_agent_stop() { + local rc + local pid + + neutron_ovs_agent_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + cleanup_agents_ovs_bridges + ocf_log info "OpenStack OVS agent (${OCF_RESKEY_binary}) already stopped" + return $OCF_SUCCESS + fi + + # Try SIGTERM + + pid=`get_worker_pid` + if [ "xxx$pid" == "xxx" ] ; then + ocf_log warn "OpenStack Neutron agent '$OCF_RESKEY_binary' not running." + #return $OCF_NOT_RUNNING + return $OCF_SUCCESS + fi + + ocf_run kill -s TERM $pid + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "OpenStack OVS agent (${OCF_RESKEY_binary}) couldn't be stopped" + exit $OCF_ERR_GENERIC + fi + + # stop waiting + shutdown_timeout=15 + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) + fi + count=0 + while [ $count -lt $shutdown_timeout ]; do + neutron_ovs_agent_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + break + fi + count=`expr $count + 1` + sleep 1 + ocf_log debug "OpenStack OVS agent (${OCF_RESKEY_binary}) still hasn't stopped yet. Waiting ..." + done + + neutron_ovs_agent_status + rc=$? + if [ $rc -ne $OCF_NOT_RUNNING ]; then + # SIGTERM didn't help either, try SIGKILL + ocf_log info "OpenStack OVS agent (${OCF_RESKEY_binary}) failed to stop after ${shutdown_timeout}s \ + using SIGTERM. Trying SIGKILL ..." + ocf_run kill -s KILL $pid + fi + + cleanup_agents_ovs_bridges + + ocf_log info "OpenStack OVS agent (${OCF_RESKEY_binary}) stopped" + + ocf_log debug "Delete pid file: ${OCF_RESKEY_pid} with content $(cat ${OCF_RESKEY_pid})" + rm -f $OCF_RESKEY_pid + + return $OCF_SUCCESS +} + +####################################################################### + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +neutron_ovs_agent_validate || exit $? +umask 0022 + +# What kind of method was invoked? +case "$1" in + start) neutron_ovs_agent_start;; + stop) neutron_ovs_agent_stop;; + status) neutron_ovs_agent_status;; + monitor) neutron_ovs_agent_monitor;; + validate-all) ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac diff --git a/files/fuel-ha-utils/ocf/rabbitmq b/files/fuel-ha-utils/ocf/rabbitmq new file mode 100755 index 0000000000..c5c925f955 --- /dev/null +++ b/files/fuel-ha-utils/ocf/rabbitmq @@ -0,0 +1,1384 @@ +#!/bin/bash +# +# See usage() function below for more details ... +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +PATH=/sbin:/usr/sbin:/bin:/usr/bin + +OCF_RESKEY_binary_default="/usr/sbin/rabbitmq-server" +OCF_RESKEY_ctl_default="/usr/sbin/rabbitmqctl" +OCF_RESKEY_debug_default=false +OCF_RESKEY_username_default="rabbitmq" +OCF_RESKEY_groupname_default="rabbitmq" +OCF_RESKEY_pid_file_default=/var/run/rabbitmq/p_pid +OCF_RESKEY_log_dir_default=/var/log/rabbitmq +OCF_RESKEY_mnesia_base_default=/var/lib/rabbitmq/mnesia +OCF_RESKEY_node_port_default=5672 +OCF_RESKEY_erlang_cookie_default=false +OCF_RESKEY_erlang_cookie_file_default="/var/lib/rabbitmq/.erlang.cookie" + +: ${HA_LOGTAG="lrmd"} +: ${HA_LOGFACILITY="daemon"} +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_ctl=${OCF_RESKEY_ctl_default}} +: ${OCF_RESKEY_debug=${OCF_RESKEY_debug_default}} +: ${OCF_RESKEY_username=${OCF_RESKEY_username_default}} +: ${OCF_RESKEY_groupname=${OCF_RESKEY_groupname_default}} +: ${OCF_RESKEY_log_dir=${OCF_RESKEY_log_dir_default}} +: ${OCF_RESKEY_mnesia_base=${OCF_RESKEY_mnesia_base_default}} +: ${OCF_RESKEY_pid_file=${OCF_RESKEY_pid_file_default}} +: ${OCF_RESKEY_node_port=${OCF_RESKEY_node_port_default}} +: ${OCF_RESKEY_erlang_cookie=${OCF_RESKEY_erlang_cookie_default}} +: ${OCF_RESKEY_erlang_cookie_file=${OCF_RESKEY_erlang_cookie_file_default}} + +####################################################################### + +OCF_RESKEY_start_time_default=$((OCF_RESKEY_CRM_meta_timeout / 6000 + 2)) +: ${OCF_RESKEY_start_time=${OCF_RESKEY_start_time_default}} +OCF_RESKEY_command_timeout_default="" +: ${OCF_RESKEY_command_timeout=${OCF_RESKEY_command_timeout_default}} +TIMEOUT_ARG=$((OCF_RESKEY_CRM_meta_timeout / 6000 + 30)) +COMMAND_TIMEOUT="/usr/bin/timeout ${OCF_RESKEY_command_timeout} ${TIMEOUT_ARG}" + +####################################################################### + +usage() { + cat < + + +1.0 + + +Resource agent for ${OCF_RESKEY_binary} + +Resource agent for ${OCF_RESKEY_binary} + + + + +RabbitMQ binary + +RabbitMQ binary + + + + + +rabbitctl binary + +rabbitctl binary binary + + + + + +RabbitMQ PID file + +RabbitMQ PID file + + + + + +RabbitMQ log directory + +RabbitMQ log directory + + + + + +RabbitMQ user name + +RabbitMQ user name + + + + + +RabbitMQ group name + +RabbitMQ group name + + + + + +Timeout command arguments for issued commands termination (value is auto evaluated) + +Arguments for timeout wrapping command + + + + + +Timeout for start rabbitmq server + +Timeout for start rabbitmq server + + + + + +The debug flag for agent (${OCF_RESKEY_binary}) instance. +In the /tmp/ directory will be created rmq-* files for log +some operations and ENV values inside OCF-script. + +AMQP server (${OCF_RESKEY_binary}) debug flag + + + + + +Base directory for storing Mnesia files + +Base directory for storing Mnesia files + + + + + +${OCF_RESKEY_binary} should listen on this port + +${OCF_RESKEY_binary} should listen on this port + + + + + +Erlang cookie for clustering. If specified, will be updated at the mnesia reset + +Erlang cookie + + + + + +Erlang cookie file path where the cookie will be put, if requested + +Erlang cookie file + + + + + + + + + + + + + + + + + + +END +} + +####################################################################### +# Functions invoked by resource manager actions + +now() { + date -u +%s +} + +master_score() { + local score=$1 + if [[ -z $score ]] ; then + score=0 + fi + ocf_run crm_master -l reboot -v $score || return $OCF_ERR_GENERIC + return $OCF_SUCCESS +} + +# Return OCF_SUCCESS, if current host is in the list of given hosts. +# Otherwise, return 10 +my_host() { + local hostlist="$1" + local hostname=$(hostname -s) + local hn + local rc=10 + local LH="${LL} my_host():" + + ocf_log info "${LH} hostlist is: $hostlist" + for host in $hostlist ; do + hn=$(echo "$host" | awk -F. '{print $1}') + ocf_log debug "${LH} comparing '$hostname' with '$hn'" + if [[ "X${hostname}" == "X${hn}" ]] ; then + rc=$OCF_SUCCESS + break + fi + done + + return $rc +} + +srv_uptime() { + local stime + stime=$( crm_attribute -N `hostname` -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d' ) + + if [ -z "${stime}" -o x"${stime}" == x"(null)" ] ; then + echo 0 + else + echo $(( $(now) - ${stime} )) + fi + + return $OCF_SUCCESS +} + +rmq_setup_env() { + local H + local dir + H=`hostname -s` + export RABBITMQ_NODENAME="rabbit@${H}" + export RABBITMQ_NODE_PORT=$OCF_RESKEY_node_port + export RABBITMQ_PID_FILE=$OCF_RESKEY_pid_file + MNESIA_FILES="${OCF_RESKEY_mnesia_base}/rabbit@${H}" + RMQ_START_TIME="${MNESIA_FILES}/ocf_server_start_time.txt" + MASTER_FLAG_FILE="${MNESIA_FILES}/ocf_master_for_${OCF_RESOURCE_INSTANCE}" + # check and make PID file dir + local PID_DIR=$( dirname $OCF_RESKEY_pid_file ) + if [ ! -d ${PID_DIR} ] ; then + mkdir -p ${PID_DIR} + chown -R ${OCF_RESKEY_username}:${OCF_RESKEY_groupname} ${PID_DIR} + chmod 755 ${PID_DIR} + fi + + # Regardless of whether we just created the directory or it + # already existed, check whether it is writable by the configured + # user + for dir in ${PID_DIR} "${OCF_RESKEY_mnesia_base}" "${OCF_RESKEY_log_dir}"; do + if test -e ${dir}; then + if ! su -s /bin/sh - $OCF_RESKEY_username -c "test -w ${dir}"; then + ocf_log warn "Directory ${dir} is not writable by ${OCF_RESKEY_username}, chowning." + chown -R ${OCF_RESKEY_username}:${OCF_RESKEY_groupname} "${dir}" + fi + fi + done + + export LL="${OCF_RESOURCE_INSTANCE}:" + update_cookie +} + +rabbit_node_name() { + echo "rabbit@"$(echo "$1" | awk -F. '{print $1}') +} + +# Return a RabbitMQ node to its virgin state. +# For reset and force_reset to succeed the RabbitMQ application must have been stopped. +# If the app cannot be stopped, beam will be killed and mnesia files will be removed. +reset_mnesia() { + local LH="${LL} reset_mnesia():" + local make_amnesia=false + local rc=$OCF_ERR_GENERIC + + # check status of a beam process + get_status + rc=$? + if [[ $rc == 0 ]] ; then + # beam is running + # check status of rabbit app and stop it, if it is running + get_status rabbit + rc=$? + if [[ $rc == 0 ]] ; then + # rabbit app is running, have to stop it + ocf_log info "${LH} Stopping RMQ-app prior to reset the mnesia." + stop_rmq_server_app + rc=$? + if [[ $rc != 0 ]] ; then + ocf_log warn "${LH} RMQ-app can't be stopped." + make_amnesia=true + fi + fi + + if ! $make_amnesia ; then + # rabbit app is not running, reset mnesia + ocf_log info "${LH} Execute reset with timeout: ${TIMEOUT_ARG}" + ocf_run ${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} reset + rc=$? + if [[ $rc != $OCF_SUCCESS ]] ; then + ocf_log info "${LH} Execute force_reset with timeout: ${TIMEOUT_ARG}" + ocf_run ${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} force_reset + rc=$? + if [[ $rc != $OCF_SUCCESS ]] ; then + ocf_log warn "${LH} Mnesia couldn't cleaned, even by force-reset command." + make_amnesia=true + fi + fi + fi + else + # there is no beam running + make_amnesia=true + ocf_log warn "${LH} There is no Beam process running." + fi + + # remove mnesia files, if required + if $make_amnesia ; then + kill_rmq_and_remove_pid + ocf_run find ${MNESIA_FILES} -type f -delete + ocf_log warn "${LH} Beam have been killed. Mnesia files appear corrupted and have been removed." + fi + # always return OCF SUCCESS + return $OCF_SUCCESS +} + + +block_client_access() +{ + # do not add temporary RMQ blocking rule, if it is already exist + # otherwise, try to add a blocking rule with max of 5 retries + tries=5 + until $(iptables -nvL | grep -q 'temporary RMQ block') || [[ $tries -eq 0 ]]; do + ((tries--)) + iptables -I INPUT -p tcp -m tcp --dport ${OCF_RESKEY_node_port} -m state --state NEW,RELATED,ESTABLISHED \ + -m comment --comment 'temporary RMQ block' -j REJECT --reject-with tcp-reset + sleep 1 + done + if [ $tries -eq 0 ]; then + return $OCF_ERR_GENERIC + else + return $OCF_SUCCESS + fi +} + +unblock_client_access() +{ + # remove all temporary RMQ blocking rules, if there are more than one exist + for i in $(iptables -nvL --line-numbers | awk '/temporary RMQ block/ {print $1}'); do + iptables -D INPUT -p tcp -m tcp --dport ${OCF_RESKEY_node_port} -m state --state NEW,RELATED,ESTABLISHED \ + -m comment --comment 'temporary RMQ block' -j REJECT --reject-with tcp-reset + done +} + +get_nodes__base(){ + local infotype='' + local rc=$OCF_ERR_GENERIC + + if [ "$1" == 'nodes' ] + then + infotype='db_nodes' + elif [ "$1" == 'running' ] + then + infotype='running_db_nodes' + fi + local c_status=$(${OCF_RESKEY_ctl} eval "mnesia:system_info(${infotype})." 2>/dev/null) + rc=$? + if [[ $rc != 0 ]] ; then + echo '' + return $OCF_ERR_GENERIC + fi + # translate line like '{running_nodes,['rabbit@node-1','rabbit@node-2','rabbit@node-3']},' to node_list + echo $(echo "${c_status}" | grep "${cl}" | awk -F, '{ for (i=1;i<=NF;i++) { if ($i ~ /@/) { gsub(/[\[\]}{]/,"",$i); print $i; } }}' | tr -d "\'") + return $OCF_SUCCESS +} + +get_nodes() { + echo $(get_nodes__base nodes) + return $? +} + +get_running_nodes() { + echo $(get_nodes__base running) + return $? +} + +check_need_join_to() { + local join_to=$(rabbit_node_name $1) + local node + local running_nodes=$(get_running_nodes) + local rc=$OCF_ERR_GENERIC + + rc=0 + for node in $running_nodes ; do + if [[ ${join_to} == ${node} ]] ; then + rc=1 + break + fi + done + + return $rc +} + +# Update erlang cookie, if it has been specified +update_cookie() { + if [[ "${OCF_RESKEY_erlang_cookie}" != false ]] ; then + echo "${OCF_RESKEY_erlang_cookie}" > "${OCF_RESKEY_erlang_cookie_file}" && \ + chown ${OCF_RESKEY_username}:${OCF_RESKEY_groupname} "${OCF_RESKEY_erlang_cookie_file}" && \ + chmod 600 "${OCF_RESKEY_erlang_cookie_file}" + fi + return $OCF_SUCCESS +} + +kill_rmq_and_remove_pid() { + local pid + local LH="${LL} kill_rmq_and_remove_pid():" + + if [[ -f $OCF_RESKEY_pid_file ]] ; then + pid=$(cat $OCF_RESKEY_pid_file) + # todo: check content for digital + if [[ -d /proc/${pid}/ ]] ; then + ocf_run kill -9 $pid + ocf_log warn "${LH} RMQ-runtime (beam) PID=${pid} stopped by 'kill -9', sorry..." + fi + ocf_run rm -f $OCF_RESKEY_pid_file + fi +} + +trim_var(){ + string="$*" + echo ${string%% } +} + +action_validate() { + # todo(sv): validate some incoming parameters + OCF_RESKEY_CRM_meta_notify_post=$(trim_var $OCF_RESKEY_CRM_meta_notify_post) + OCF_RESKEY_CRM_meta_notify_pre=$(trim_var $OCF_RESKEY_CRM_meta_notify_pre) + OCF_RESKEY_CRM_meta_notify_start=$(trim_var $OCF_RESKEY_CRM_meta_notify_start) + OCF_RESKEY_CRM_meta_notify_stop=$(trim_var $OCF_RESKEY_CRM_meta_notify_stop) + OCF_RESKEY_CRM_meta_notify_start_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_start_resource) + OCF_RESKEY_CRM_meta_notify_stop_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_stop_resource) + OCF_RESKEY_CRM_meta_notify_active_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_active_resource) + OCF_RESKEY_CRM_meta_notify_inactive_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_inactive_resource) + OCF_RESKEY_CRM_meta_notify_start_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_start_uname) + OCF_RESKEY_CRM_meta_notify_stop_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_stop_uname) + OCF_RESKEY_CRM_meta_notify_active_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_active_uname) + OCF_RESKEY_CRM_meta_notify_master_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_master_resource) + OCF_RESKEY_CRM_meta_notify_master_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_master_uname) + OCF_RESKEY_CRM_meta_notify_demote_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_demote_resource) + OCF_RESKEY_CRM_meta_notify_demote_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_demote_uname) + OCF_RESKEY_CRM_meta_notify_slave_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_slave_resource) + OCF_RESKEY_CRM_meta_notify_slave_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_slave_uname) + OCF_RESKEY_CRM_meta_notify_promote_resource=$(trim_var $OCF_RESKEY_CRM_meta_notify_promote_resource) + OCF_RESKEY_CRM_meta_notify_promote_uname=$(trim_var $OCF_RESKEY_CRM_meta_notify_promote_uname) + return $OCF_SUCCESS +} + +join_to_cluster() { + local node="$1" + local rmq_node=$(rabbit_node_name $node) + local rc=$OCF_ERR_GENERIC + local LH="${LL} join_to_cluster():" + + ocf_log info "${LH} start." + ocf_log info "${LH} Joining to cluster by node '${rmq_node}'." + + get_status rabbit + rc=$? + if [[ $rc == $OCF_SUCCESS ]] ; then + ocf_log info "${LH} rabbitmq app will be stopped." + stop_rmq_server_app + rc=$? + if [[ $rc != 0 ]] ; then + ocf_log err "${LH} Can't stop rabbitmq app by stop_app command. Stopping." + action_stop + return $OCF_ERR_GENERIC + fi + fi + ocf_log info "${LH} Execute join_cluster with timeout: ${TIMEOUT_ARG}" + ocf_run ${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} join_cluster $rmq_node + rc=$? + if [[ $rc != $OCF_SUCCESS ]] ; then + ocf_log err "${LH} Can't join to cluster by node '${rmq_node}'. Stopping." + action_stop + return $OCF_ERR_GENERIC + fi + sleep 2 + try_to_start_rmq_app + rc=$? + if [[ $rc != 0 ]] ; then + ocf_log err "${LH} Can't start RMQ app after join to cluster. Stopping." + action_stop + return $OCF_ERR_GENERIC + else + ocf_log info "${LH} Rabbit app started successfully. Updating start time attribute with $(now)" + ocf_run crm_attribute -N `hostname` -l reboot --name 'rabbit-start-time' --update $(now) + ocf_log info "${LH} Joined to cluster succesfully." + fi + + ocf_log info "${LH} end." + return $rc +} + +unjoin_nodes_from_cluster() { + # node names of the nodes where the pcs resource is being stopped + local nodelist="$1" + local hostname + local nodename + local rc=$OCF_ERR_GENERIC + local rnode + # nodes in rabbit cluster db + local nodes_in_cluster + local LH="${LL} unjoin_nodes_from_cluster():" + + nodes_in_cluster=$(get_nodes) + rc=$? + if [[ $rc != 0 ]] ; then + # no nodes in node list, nothing to do + return $OCF_SUCCESS + fi + + # unjoin all cluster nodes which are being stopped (i.e. recieved post-stop notify), except *this* node + # before to unjoin the nodes, make sure they were disconnected from *this* node + for hostname in $nodelist ; do + nodename=$(rabbit_node_name $hostname) + if [[ "$nodename" == "$RABBITMQ_NODENAME" ]] ; then + continue + fi + for rnode in $nodes_in_cluster ; do + if [[ "$nodename" == "$rnode" ]] ; then + # disconnect node being unjoined from this node + ocf_run ${OCF_RESKEY_ctl} eval "disconnect_node(list_to_atom(\"${nodename}\"))." 2>&1 + rc=$? + if [[ $rc == $OCF_SUCCESS ]] ; then + ocf_log info "${LH} node '${nodename}' disconnected succesfully." + else + ocf_log info "${LH} disconnecting node '${nodename}' failed." + fi + + # unjoin node + ocf_log info "${LH} Execute forget_cluster_node with timeout: ${TIMEOUT_ARG}" + ocf_run ${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} forget_cluster_node ${nodename} 2>&1 + rc=$? + if [[ $rc == $OCF_SUCCESS ]] ; then + ocf_log info "${LH} node '${nodename}' unjoined succesfully." + else + ocf_log info "${LH} unjoining node '${nodename}' failed." + fi + fi + done + done + return $OCF_SUCCESS +} + +# Stop RMQ server process. Returns OCS_SUCCESS +stop_server_process() { + local pid + local rc=$OCF_ERR_GENERIC + local LH="${LL} stop_server_process():" + + pid=$(cat ${OCF_RESKEY_pid_file}) + rc=$? + if [[ $rc != 0 ]] ; then + ocf_log err "${LH} RMQ-server process PIDFILE was not found!" + return $OCF_ERR_GENERIC + fi + + ocf_log info "${LH} Execute stop with timeout: ${TIMEOUT_ARG}" + ocf_run ${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} stop ${OCF_RESKEY_pid_file} 2>&1 >> "${OCF_RESKEY_log_dir}/shutdown_log" + if [[ $rc == $OCF_SUCCESS ]] ; then + ocf_log info "${LH} RMQ-server process (PID=${pid}) stopped succesfully." + else + # RMQ-server process can't stop succesfully + if [[ -d /proc/$stop_pid/ ]] ; then + ocf_log warn "${LH} RMQ-server process (PID=${pid}) stopped by 'kill -9', sorry..." + ocf_run kill -9 $stop_pid + fi + fi + + kill_rmq_and_remove_pid + return $OCF_SUCCESS +} + +# Stop RMQ-app. Return OCF_SUCCESS, if the app was stopped, +# otherwise return OCF_ERR_GENERIC +stop_rmq_server_app() { + local rc=$OCF_ERR_GENERIC + + # if the beam process isn't running, then rabbit app is stopped as well + get_status + rc=$? + if [[ $rc != 0 ]] ; then + return $OCF_SUCCESS + fi + + # stop the app + ocf_log info "${LH} Execute stop_app with timeout: ${TIMEOUT_ARG}" + ocf_run ${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} stop_app 2>&1 >> "${OCF_RESKEY_log_dir}/shutdown_log" + + get_status rabbit + rc=$? + if [[ $rc != $OCF_SUCCESS ]] ; then + ocf_log info "${LH} RMQ-server app stopped succesfully." + rc=$OCF_SUCCESS + else + ocf_log err "${LH} RMQ-server app cannot be stopped." + rc=$OCF_ERR_GENERIC + fi + + return $rc +} + +start_beam_process() { + local rc=$OCF_ERR_GENERIC + local ts_end + local pf_end + local pid + local LH="${LL} start_beam_process():" + + # remove old PID-file if it exists + if [[ -f $OCF_RESKEY_pid_file ]] ; then + ocf_log warn "${LH} found old PID-file '${OCF_RESKEY_pid_file}'." + pid=$(cat ${OCF_RESKEY_pid_file}) + if [[ -d /proc/${pid} && ! -z ${pid} ]] ; then + ocf_run cat /proc/${pid}/cmdline | grep -c 'bin/beam' 2>&1 > /dev/null + rc=$? + if [[ $rc == $OCF_SUCCESS ]] ; then + ocf_log warn "${LH} found beam process with PID=${pid}, killing...'." + ocf_run kill -9 $pid + else + ocf_log err "${LH} found unknown process with PID=${pid} from '${OCF_RESKEY_pid_file}'." + return $OCF_ERR_GENERIC + fi + fi + ocf_run rm -rf $OCF_RESKEY_pid_file + fi + + [ -f /etc/default/rabbitmq-server ] && . /etc/default/rabbitmq-server + + # run beam process + RABBITMQ_NODE_ONLY=1 ${OCF_RESKEY_binary} >> "${OCF_RESKEY_log_dir}/startup_log" 2>/dev/null & + ts_end=$(( $(now) + ${OCF_RESKEY_start_time} )) + rc=$OCF_ERR_GENERIC + while [ $(now) -lt ${ts_end} ]; do + # waiting for normal start of beam + pid=0 + pf_end=$(( $(now) + 3 )) + while [ $(now) -lt ${pf_end} ]; do + # waiting for OCF_RESKEY_pid_file of beam process + if [[ -f $OCF_RESKEY_pid_file ]] ; then + pid=$(cat ${OCF_RESKEY_pid_file}) + break + fi + sleep 1 + done + if [[ $pid != 0 && -d /proc/${pid} ]] ; then + rc=$OCF_SUCCESS + break + fi + sleep 2 + done + if [[ $rc != $OCF_SUCCESS ]]; then + if [[ "${pid}" == "0" ]] ; then + ocf_log warn "${LH} PID-file '${OCF_RESKEY_pid_file}' not found" + fi + ocf_log err "${LH} RMQ-runtime (beam) didn't start succesfully (rc=${rc})." + fi + + return $rc +} + +check_plugins() { + # Check if it's safe to load plugins and if we need to do so. Logic is: + # if (EnabledPlugins > 0) and (ActivePlugins == 0) ; then it's safe to load + # If we have at least one active plugin, then it's not safe to re-load them + # because plugins:setup() would remove existing dependency plugins in plugins_expand_dir. + ${OCF_RESKEY_ctl} eval '{ok, EnabledFile} = application:get_env(rabbit, enabled_plugins_file), EnabledPlugins = rabbit_plugins:read_enabled(EnabledFile), ActivePlugins = rabbit_plugins:active(), if length(EnabledPlugins)>0 -> if length(ActivePlugins)==0 -> erlang:error("need_to_load_plugins"); true -> false end; true -> false end.' + return $? +} + +load_plugins() { + check_plugins + if [[ $? == 0 ]] ; then + return 0 + else + ${OCF_RESKEY_ctl} eval 'ToBeLoaded = rabbit_plugins:setup(), ok = app_utils:load_applications(ToBeLoaded), StartupApps = app_utils:app_dependency_order(ToBeLoaded,false), app_utils:start_applications(StartupApps).' + return $? + fi +} + +list_active_plugins() { + local LIST=`${OCF_RESKEY_ctl} eval 'rabbit_plugins:active().'` + echo "${LIST}" +} + +try_to_start_rmq_app() { + local startup_log="${1:-${OCF_RESKEY_log_dir}/startup_log}" + local rc=$OCF_ERR_GENERIC + local LH="${LL} try_to_start_rmq_app():" + + + if [[ -z $startup_log ]] ; then + startup_log="${OCF_RESKEY_log_dir}/startup_log" + fi + + ocf_log info "${LH} begin." + ocf_log info "${LH} Execute start_app with timeout: ${TIMEOUT_ARG}" + ocf_run ${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} start_app >>$startup_log 2>&1 + rc=$? + if [[ $rc == $OCF_SUCCESS ]] ; then + ocf_log info "${LH} start_app was successful." + ocf_log info "${LH} waiting for start to finish with timeout: ${TIMEOUT_ARG}" + ocf_run ${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} wait ${OCF_RESKEY_pid_file} + rc=$OCF_SUCCESS + # Loading enabled modules + ocf_log info "${LH} start plugins." + load_plugins + local mrc=$? + if [[ $mrc == 0 ]] ; then + local MLIST=`list_active_plugins` + ocf_log info "${LH} Starting plugins: $MLIST" + else + ocf_log info "${LH} Starting plugins: failed." + fi + else + ocf_log info "${LH} start_app failed." + rc=$OCF_ERR_GENERIC + fi + return $rc +} + +start_rmq_server_app() { + local rc=$OCF_ERR_GENERIC + local startup_log="${OCF_RESKEY_log_dir}/startup_log" + local startup_output + local LH="${LL} start_rmq_server_app():" + + #We are performing initial start check. + #We are not ready to provide service. + #Clients should not have access. + + + ocf_log info "${LH} begin." + # Safe-unblock the rules, if there are any + unblock_client_access + # Apply the blocking rule + block_client_access + rc=$? + if [[ $rc == $OCF_SUCCESS ]]; then + ocf_log info "${LH} blocked access to RMQ port" + else + ocf_log err "${LH} cannot block access to RMQ port!" + return $OCF_ERR_GENERIC + fi + get_status + rc=$? + if [[ $rc != $OCF_SUCCESS ]] ; then + ocf_log info "${LH} RMQ-runtime (beam) not started, starting..." + start_beam_process + rc=$? + if [[ $rc != $OCF_SUCCESS ]]; then + unblock_client_access + ocf_log info "${LH} unblocked access to RMQ port" + return $OCF_ERR_GENERIC + fi + fi + + ocf_log info "${LH} RMQ-server app not started, starting..." + try_to_start_rmq_app "$startup_log" + rc=$? + if [[ $rc == $OCF_SUCCESS ]] ; then + # rabbitmq-server started successfuly as master of cluster + master_score 1 # minimal positive master-score for this node. + stop_rmq_server_app + rc=$? + if [[ $rc != 0 ]] ; then + ocf_log err "${LH} RMQ-server app can't be stopped. Beam will be killed." + kill_rmq_and_remove_pid + unblock_client_access + ocf_log info "${LH} unblocked access to RMQ port" + return $OCF_ERR_GENERIC + fi + else + # error at start RMQ-server + ocf_log warn "${LH} RMQ-server app can't start without Mnesia cleaning." + for ((a=10; a > 0 ; a--)) ; do + rc=$OCF_ERR_GENERIC + reset_mnesia || break + try_to_start_rmq_app "$startup_log" + rc=$? + if [[ $rc == $OCF_SUCCESS ]]; then + stop_rmq_server_app + rc=$? + if [[ $rc == $OCF_SUCCESS ]]; then + ocf_log info "${LH} RMQ-server app Mnesia cleaned successfully." + rc=$OCF_SUCCESS + master_score 1 + break + else + ocf_log err "${LH} RMQ-server app can't be stopped during Mnesia cleaning. Beam will be killed." + kill_rmq_and_remove_pid + unblock_client_access + ocf_log info "${LH} unblocked access to RMQ port" + return $OCF_ERR_GENERIC + fi + fi + done + fi + if [[ $rc == $OCF_ERR_GENERIC ]] ; then + ocf_log err "${LH} RMQ-server can't be started while many tries. Beam will be killed." + kill_rmq_and_remove_pid + fi + ocf_log info "${LH} end." + unblock_client_access + ocf_log info "${LH} unblocked access to RMQ port" + return $rc +} + +# check status of rabbit beam process or a rabbit app, if rabbit arg specified +get_status() { + local what="$1" + local rc=$OCF_ERR_GENERIC + local body + + body=$( ${COMMAND_TIMEOUT} ${OCF_RESKEY_ctl} eval 'rabbit_misc:which_applications().' 2>&1 ) + rc=$? + + if [[ $rc != 0 ]] ; then + return $OCF_NOT_RUNNING + fi + + if [[ ! -z $what ]] ; then + rc=$OCF_NOT_RUNNING + echo "$body" | grep "\{${what}," 2>&1 > /dev/null && rc=$OCF_SUCCESS + fi + + return $rc +} + +action_status() { + local rc=$OCF_ERR_GENERIC + + get_status + rc=$? + return $rc +} + +# return 0, if given node has a master attribute in CIB, +# otherwise, return 1 +is_master() { + local result + result=`crm_attribute -N "${1}" -l reboot --name 'rabbit-master' --query 2>/dev/null |\ + awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d'` + if [[ "${result}" != "true" ]] ; then + return 1 + fi + return 0 +} + + +get_monitor() { + local rc=$OCF_ERR_GENERIC + local scope + local LH="${LL} get_monitor():" + local status_master + + get_status + rc=$? + if [[ $rc == $OCF_NOT_RUNNING ]] ; then + ocf_log info "${LH} get_status() returns ${rc}." + master_score 0 + return $OCF_NOT_RUNNING + elif [[ $rc == $OCF_SUCCESS ]] ; then + ocf_log info "${LH} get_status() returns ${rc}." + ocf_log info "${LH} also checking if we are master." + get_status rabbit + rabbit_running=$? + is_master `hostname` + status_master=$? + ocf_log info "${LH} master attribute is ${status_master}" + if [ $status_master -eq 0 -a $rabbit_running -eq $OCF_SUCCESS ] + then + rc=$OCF_RUNNING_MASTER + fi + fi + get_status rabbit + rabbit_running=$? + ocf_log info "${LH} checking if rabbit app is running" + + if [ $rabbit_running == $OCF_SUCCESS ] + then + ocf_log info "${LH} rabbit app is running. checking if we are the part of healthy cluster" + prev_rc=$rc + nodelist=`crm_node -p -l | grep -v lost | awk '{print $2}' | grep -v "^$" | sed -e '/(null)/d'` + for node in $nodelist + do + ocf_log info "${LH} rabbit app is running. looking for master on $node" + is_master $node + status_master=$? + ocf_log info "${LH} fetched master attribute for $node. attr value is ${status_master}" + if [ $status_master -eq 0 ] ; then + rc=$OCF_ERR_GENERIC + ocf_log info "${LH} rabbit app is running. master is $node" + if get_running_nodes | grep -q $(rabbit_node_name $node) + then + ocf_log info "${LH} rabbit app is running and is member of healthy cluster" + rc=$prev_rc + break + fi + fi + done + fi + + if [[ $rc == $OCF_ERR_GENERIC ]]; then + ocf_log info "${LH} get_status() returns generic error ${rc}" + ocf_log info "${LH} ensuring this slave does not get promoted." + master_score 0 + return $OCF_ERR_GENERIC + else + ocf_log info "${LH} preparing to update master score for node" + our_uptime=$(srv_uptime) + nodelist=$( crm_node -p -l | grep -v lost | awk '{print $2}' | grep -v "^$" | grep -v `hostname` | sed -e '/(null)/d' ) + max=1 + for node in $nodelist + do + node_start_time=`crm_attribute -N $node -l reboot --name 'rabbit-start-time' --query 2>/dev/null | awk '{print $3}' | awk -F "=" '{print $2}' | sed -e '/(null)/d'` + if [ -z "${node_start_time}" -o x"${node_start_time}" == x"(null)" ] ; then + node_uptime=0 + else + node_uptime=$(( $(now) - ${node_start_time} )) + fi + ocf_log info "${LH} comparing our uptime (${our_uptime}) with $node (${node_uptime})" + if [ ${our_uptime} -lt ${node_uptime} ] + then + max=1 + break + else + # When uptime is equal, accept the existing master - if any - as the oldest node + is_master $node + status_master=$? + if [ $status_master -eq 0 ] ; then + max=1 + ocf_log info "${LH} Found the oldest master node $node with uptime (${node_uptime})" + break + else + max=0 + fi + fi + done + + + if [ $max -eq 0 ] + then + ocf_log info "${LH} we are the oldest node" + master_score 1000 + fi + fi + ocf_log info "${LH} get_monitor function ready to return ${rc}" + return $rc +} + + +action_monitor() { + local rc=$OCF_ERR_GENERIC + local LH="${LL} monitor:" + ocf_log debug "${LH} action start." + if [[ "${OCF_RESKEY_debug}" == "true" ]] ; then + d=`date '+%Y%m%d %H:%M:%S'` + echo $d >> /tmp/rmq-monitor.log + env >> /tmp/rmq-monitor.log + echo "$d [monitor] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log + fi + get_monitor + rc=$? + ocf_log debug "${LH} role: ${OCF_RESKEY_CRM_meta_role}" + ocf_log debug "${LH} result: $rc" + ocf_log debug "${LH} action end." + return $rc +} + + +action_start() { + local rc=$OCF_ERR_GENERIC + local msg + local master_node + local LH="${LL} start:" + + if [[ ${OCF_RESKEY_debug} == "true" ]] ; then + d=`date '+%Y%m%d %H:%M:%S'` + echo $d >> /tmp/rmq-start.log + env >> /tmp/rmq-start.log + echo "$d [start] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log + fi + + ocf_log info "${LH} action begin." + + get_status + rc=$? + if [[ $rc == $OCF_SUCCESS ]] ; then + ocf_log warn "${LH} RMQ-runtime (beam) already started." + return $OCF_SUCCESS + fi + + ocf_log info "${LH} RMQ going to start." + start_rmq_server_app + rc=$? + if [[ $rc == $OCF_SUCCESS ]] ; then + ocf_log info "${LH} RMQ prepared for start succesfully." + fi + + ocf_log info "${LH} action end." + return $rc +} + + +action_stop() { + local rc=$OCF_ERR_GENERIC + local LH="${LL} stop:" + + if [[ ${OCF_RESKEY_debug} == "true" ]] ; then + d=$(date '+%Y%m%d %H:%M:%S') + echo $d >> /tmp/rmq-stop.log + env >> /tmp/rmq-stop.log + echo "$d [stop] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log + fi + + ocf_log info "${LH} action begin." + + # remove master flag + # remove master score + crm_attribute -N `hostname` -l reboot --name 'rabbit-master' --delete + master_score 0 + + ocf_log info "${LH} RMQ-runtime (beam) going to down." + stop_server_process + + crm_attribute -N `hostname` -l reboot --name 'rabbit-start-time' --delete + # remove file with rmq-server start timestamp + + #todo: make this timeout corresponded to the stop timeout for resource + sleep 10 + + ocf_log info "${LH} action end." + get_status + rc=$? + if [[ $rc == $OCF_NOT_RUNNING ]] ; then + ocf_log info "${LH} RMQ-runtime (beam) not running." + return $OCF_SUCCESS + else + return $OCF_ERR_GENERIC + fi + +} + +####################################################################### +# Join the cluster and return OCF_SUCCESS, if joined. +# Return 10, if node is trying to join to itself or empty destination. +# Return OCF_ERR_GENERIC, if cannot join. +jjj_join () { + local join_to="$1" + local rc=$OCF_ERR_GENERIC + local LH="${LL} jjj_join:" + + my_host ${join_to} + rc=$? + ocf_log debug "${LH} node='${join_to}' rc='${rc}'" + + # Check whether we are joining to ourselves + # or master host is not given + if [[ $rc != 0 && $join_to != '' ]] ; then + ocf_log info "${LH} Joining to cluster by node '${join_to}'" + join_to_cluster "${join_to}" + rc=$? + if [[ $rc != $OCF_SUCCESS ]] ; then + rc=$OCF_ERR_GENERIC + fi + fi + return $rc +} + +action_notify() { + local rc=$OCF_ERR_GENERIC + local rc2=$OCF_ERR_GENERIC + local LH="${LL} notify:" + + if [[ ${OCF_RESKEY_debug} == "true" ]] ; then + d=`date '+%Y%m%d %H:%M:%S'` + echo $d >> /tmp/rmq-notify.log + env >> /tmp/rmq-notify.log + echo "$d [notify] ${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation} promote='${OCF_RESKEY_CRM_meta_notify_promote_uname}' demote='${OCF_RESKEY_CRM_meta_notify_demote_uname}' master='${OCF_RESKEY_CRM_meta_notify_master_uname}' slave='${OCF_RESKEY_CRM_meta_notify_slave_uname}' start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log + fi + + if [[ ${OCF_RESKEY_CRM_meta_notify_type} == 'pre' ]] ; then + # PRE- anything notify section + case "$OCF_RESKEY_CRM_meta_notify_operation" in + promote) + ocf_log info "${LH} pre-promote begin." + my_host "$OCF_RESKEY_CRM_meta_notify_promote_uname" + rc=$? + if [[ $rc == $OCF_SUCCESS ]] ; then + nodelist=`crm_node -l | awk '{print $2}' | grep -v "^$" | sed -e '/(null)/d'` + for i in $nodelist + do + crm_attribute -N $i -l reboot --name 'rabbit-master' --delete + done + ocf_log info "${LH} pre-promote end." + fi + ;; + *) + ;; + esac + fi + + if [[ ${OCF_RESKEY_CRM_meta_notify_type} == 'post' ]] ; then + # POST- anything notify section + case "$OCF_RESKEY_CRM_meta_notify_operation" in + promote) + ocf_log info "${LH} post-promote begin." + jjj_join ${OCF_RESKEY_CRM_meta_notify_promote_uname} + rc=$? + ocf_log info "${LH} post-promote end." + if [[ $rc == $OCF_ERR_GENERIC ]] ; then + ocf_log err "${LH} Failed to join the cluster on post-promote. Master resource is failed." + exit $OCF_FAILED_MASTER + fi + ;; + start) + ocf_log info "${LH} post-start begin." + # check did this event from this host + my_host "$OCF_RESKEY_CRM_meta_notify_start_uname" + rc=$? + if [[ $rc == $OCF_SUCCESS ]] ; then + jjj_join ${OCF_RESKEY_CRM_meta_notify_master_uname} + rc2=$? + ocf_log info "${LH} post-start end." + if [[ $rc2 == $OCF_ERR_GENERIC ]] ; then + ocf_log warn "${LH} Failed to join the cluster on post-start. Resource is failed." + return $OCF_NOT_RUNNING + fi + fi + ;; + stop) + # if rabbitmq-server stops on any another node, we should remove it from cluster (as ordinary operation) + ocf_log info "${LH} post-stop begin." + # always returns OCF_SUCCESS + unjoin_nodes_from_cluster "${OCF_RESKEY_CRM_meta_notify_stop_uname}" + ocf_log info "${LH} post-stop end." + ;; + demote) + # if rabbitmq-server stops on any another node, we should remove it from cluster (as ordinary operation) + ocf_log info "${LH} post-demote begin." + my_host "${OCF_RESKEY_CRM_meta_notify_demote_uname}" + rc=$? + if [[ $rc != $OCF_SUCCESS ]] ; then + ocf_log info "${LH} master was demoted. stopping RabbitMQ app." + stop_rmq_server_app + rc2=$? + crm_attribute -N `hostname` -l reboot --name 'rabbit-start-time' --delete + fi + ocf_log info "${LH} post-demote end." + if [[ $rc2 != $OCF_SUCCESS ]] ; then + ocf_log err "${LH} RMQ-server app can't be stopped on post-demote. Master resource is failed" + exit $OCF_FAILED_MASTER + fi + ;; + *) ;; + esac + fi + + return $OCF_SUCCESS +} + + +action_promote() { + local rc=$OCF_ERR_GENERIC + local pid + local LH="${LL} promote:" + + if [[ ${OCF_RESKEY_debug} == "true" ]] ; then + d=$(date '+%Y%m%d %H:%M:%S') + echo $d >> /tmp/rmq-promote.log + env >> /tmp/rmq-promote.log + echo "$d [promote] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log + fi + + ocf_log info "${LH} action begin." + + get_monitor + rc=$? + ocf_log info "${LH} get_monitor returns ${rc}" + case "$rc" in + "$OCF_SUCCESS") + # Running as slave. Normal, expected behavior. + ocf_log info "${LH} Resource is currently running as Slave" + # rabbitmqctl start_app if need + get_status rabbit + rc=$? + ocf_log info "${LH} Updating cluster master attribute" + ocf_run crm_attribute -N `hostname` -l reboot --name 'rabbit-master' --update 'true' + if [[ $rc != $OCF_SUCCESS ]] ; then + ocf_log info "${LH} RMQ app is not started. Starting..." + start_rmq_server_app + rc=$? + if [[ $rc == 0 ]] ; then + try_to_start_rmq_app + rc=$? + if [[ $rc != 0 ]] ; then + ocf_log err "${LH} Can't start RMQ app. Master resource is failed." + ocf_log info "${LH} action end." + exit $OCF_FAILED_MASTER + fi + ocf_log info "${LH} Setting HA policy for all queues" + rabbitmqctl set_policy ha-all "." '{"ha-mode":"all", "ha-sync-mode":"automatic"}' --apply-to all --priority 0 + rabbitmqctl set_policy heat_rpc_expire "^heat-engine-listener\\." '{"expires":3600000,"ha-mode":"all","ha-sync-mode":"automatic"}' --apply-to all --priority 1 + rabbitmqctl set_policy results_expire "^results\\." '{"expires":3600000,"ha-mode":"all","ha-sync-mode":"automatic"}' --apply-to all --priority 1 + rabbitmqctl set_policy tasks_expire "^tasks\\." '{"expires":3600000,"ha-mode":"all","ha-sync-mode":"automatic"}' --apply-to all --priority 1 + # create timestamp file + ocf_log info "${LH} Updating start timestamp" + ocf_run crm_attribute -N `hostname` -l reboot --name 'rabbit-start-time' --update $(now) + ocf_log info "${LH} Checking master status" + get_monitor + rc=$? + ocf_log info "${LH} Master status is $rc" + if [ $rc == $OCF_RUNNING_MASTER ] + then + rc=$OCF_SUCCESS + else + ocf_log err "${LH} Master resource is failed." + ocf_log info "${LH} action end." + exit $OCF_FAILED_MASTER + fi + else + ocf_log err "${LH} Can't start RMQ-runtime." + rc=$OCF_ERR_GENERIC + fi + fi + return $rc + ;; + "$OCF_RUNNING_MASTER") + # Already a master. Unexpected, but not a problem. + ocf_log warn "${LH} Resource is already running as Master" + rc=$OCF_SUCCESS + ;; + + "$OCF_FAILED_MASTER") + # Master failed. + ocf_log err "${LH} Master resource is failed and not running" + ocf_log info "${LH} action end." + exit $OCF_FAILED_MASTER + ;; + + "$OCF_NOT_RUNNING") + # Currently not running. + ocf_log err "${LH} Resource is currently not running" + rc=$OCF_NOT_RUNNING + ;; + *) + # Failed resource. Let the cluster manager recover. + ocf_log err "${LH} Unexpected error, cannot promote" + ocf_log info "${LH} action end." + exit $rc + ;; + esac + + # transform slave RMQ-server to master + + ocf_log info "${LH} action end." + return $rc +} + + +action_demote() { + local rc=$OCF_ERR_GENERIC + local pid + local LH="${LL} demote:" + + if [[ ${OCF_RESKEY_debug} == "true" ]] ; then + d=`date '+%Y%m%d %H:%M:%S'` + echo $d >> /tmp/rmq-demote.log + env >> /tmp/rmq-demote.log + echo "$d [demote] start='${OCF_RESKEY_CRM_meta_notify_start_uname}' stop='${OCF_RESKEY_CRM_meta_notify_stop_uname}' active='${OCF_RESKEY_CRM_meta_notify_active_uname}' inactive='${OCF_RESKEY_CRM_meta_notify_inactive_uname}'" >> /tmp/rmq-ocf.log + + fi + + ocf_log info "${LH} action begin." + + get_monitor + rc=$? + case "$rc" in + "$OCF_RUNNING_MASTER") + # Running as master. Normal, expected behavior. + ocf_log warn "${LH} Resource is currently running as Master" + stop_rmq_server_app + rc=$? + crm_attribute -N `hostname` -l reboot --name 'rabbit-master' --delete + crm_attribute -N `hostname` -l reboot --name 'rabbit-start-time' --delete + ;; + "$OCF_SUCCESS") + # Alread running as slave. Nothing to do. + ocf_log warn "${LH} Resource is currently running as Slave" + rc=$OCF_SUCCESS + ;; + "$OCF_FAILED_MASTER") + # Master failed and being demoted. + ocf_log err "${LH} Demoting of a failed Master." + ocf_log info "${LH} action end." + exit $OCF_FAILED_MASTER + ;; + "$OCF_NOT_RUNNING") + ocf_log warn "${LH} Try to demote currently not running resource. Nothing to do." + rc=$OCF_SUCCESS + ;; + "$OCF_ERR_GENERIC") + ocf_log err "${LH} Error while demote. Stopping resource." + action_stop + rc=$? + ;; + *) + # Failed resource. Let the cluster manager recover. + ocf_log err "${LH} Unexpected error, cannot demote" + ocf_log info "${LH} action end." + exit $rc + ;; + esac + + # transform master RMQ-server to slave + ocf_log info "${LH} action end." + return $rc +} +####################################################################### + +rmq_setup_env + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +action_validate || exit $? + +# What kind of method was invoked? +case "$1" in + start) action_start;; + stop) action_stop;; + status) action_status;; + monitor) action_monitor;; + validate) action_validate;; + promote) action_promote;; + demote) action_demote;; + notify) action_notify;; + validate-all) action_validate;; + *) usage;; +esac +### diff --git a/files/fuel-ha-utils/tools/clustercheck b/files/fuel-ha-utils/tools/clustercheck new file mode 100644 index 0000000000..580b864472 --- /dev/null +++ b/files/fuel-ha-utils/tools/clustercheck @@ -0,0 +1,76 @@ +#!/bin/bash +# +# Script to make a proxy (ie HAProxy) capable of monitoring Percona XtraDB Cluster nodes properly +# +# Author: Olaf van Zandwijk +# Author: Raghavendra Prabhu +# +# Documentation and download: https://github.com/olafz/percona-clustercheck +# +# Based on the original script from Unai Rodriguez +# + +if [[ $1 == '-h' || $1 == '--help' ]];then + echo "Usage: $0 " + exit +fi + +[ -f /etc/wsrepclustercheckrc ] && . /etc/wsrepclustercheckrc + +if [[ -r $DEFAULTS_EXTRA_FILE ]];then + MYSQL_CMDLINE="mysql --defaults-extra-file=$DEFAULTS_EXTRA_FILE -nNE --connect-timeout=$TIMEOUT \ + --user=${MYSQL_USERNAME} --password=${MYSQL_PASSWORD} -h ${MYSQL_HOST} -P ${MYSQL_PORT}" +else + MYSQL_CMDLINE="mysql -nNE --connect-timeout=$TIMEOUT --user=${MYSQL_USERNAME} --password=${MYSQL_PASSWORD} \ + -h ${MYSQL_HOST} -P ${MYSQL_PORT}" +fi +# +# Perform the query to check the wsrep_local_state +# +WSREP_STATUS=$($MYSQL_CMDLINE -e "SHOW STATUS LIKE 'wsrep_local_state';" \ + 2>${ERR_FILE} | tail -1 2>>${ERR_FILE}) + +if [[ "${WSREP_STATUS}" == "4" ]] || [[ "${WSREP_STATUS}" == "2" && ${AVAILABLE_WHEN_DONOR} == 1 ]] +then + # Check only when set to 0 to avoid latency in response. + if [[ $AVAILABLE_WHEN_READONLY -eq 0 ]];then + READ_ONLY=$($MYSQL_CMDLINE -e "SHOW GLOBAL VARIABLES LIKE 'read_only';" \ + 2>${ERR_FILE} | tail -1 2>>${ERR_FILE}) + + if [[ "${READ_ONLY}" == "ON" ]];then + # Percona XtraDB Cluster node local state is 'Synced', but it is in + # read-only mode. The variable AVAILABLE_WHEN_READONLY is set to 0. + # => return HTTP 503 + # Shell return-code is 1 + echo -en "HTTP/1.1 503 Service Unavailable\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 43\r\n" + echo -en "\r\n" + echo -en "Percona XtraDB Cluster Node is read-only.\r\n" + sleep 0.1 + exit 1 + fi + fi + # Percona XtraDB Cluster node local state is 'Synced' => return HTTP 200 + # Shell return-code is 0 + echo -en "HTTP/1.1 200 OK\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 40\r\n" + echo -en "\r\n" + echo -en "Percona XtraDB Cluster Node is synced.\r\n" + sleep 0.1 + exit 0 +else + # Percona XtraDB Cluster node local state is not 'Synced' => return HTTP 503 + # Shell return-code is 1 + echo -en "HTTP/1.1 503 Service Unavailable\r\n" + echo -en "Content-Type: text/plain\r\n" + echo -en "Connection: close\r\n" + echo -en "Content-Length: 44\r\n" + echo -en "\r\n" + echo -en "Percona XtraDB Cluster Node is not synced.\r\n" + sleep 0.1 + exit 1 +fi diff --git a/files/fuel-ha-utils/tools/q-agent-cleanup.py b/files/fuel-ha-utils/tools/q-agent-cleanup.py new file mode 100644 index 0000000000..fb863b850a --- /dev/null +++ b/files/fuel-ha-utils/tools/q-agent-cleanup.py @@ -0,0 +1,645 @@ +#!/usr/bin/env python +# Copyright 2013 - 2015 Mirantis, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import argparse +from ConfigParser import SafeConfigParser +import functools +import json +import logging +import logging.config +import logging.handlers +import re +import socket +import StringIO +import subprocess +import sys +from time import sleep + +from neutronclient.neutron import client as n_client + +LOG_NAME = 'q-agent-cleanup' + +API_VER = '2.0' +PORT_ID_PART_LEN = 11 + + +def make_logger(handler=logging.StreamHandler(sys.stdout), level=logging.INFO): + format = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + handler.setFormatter(format) + logger = logging.getLogger(LOG_NAME) + logger.addHandler(handler) + logger.setLevel(level) + return logger + +LOG = make_logger() + +AUTH_KEYS = { + 'tenant_name': 'admin_tenant_name', + 'username': 'admin_user', + 'password': 'admin_password', + 'auth_url': 'auth_uri', +} + + +def get_auth_data(cfg_file, section='keystone_authtoken', keys=AUTH_KEYS): + cfg = SafeConfigParser() + with open(cfg_file) as f: + cfg.readfp(f) + auth_data = {} + for key, value in keys.iteritems(): + auth_data[key] = cfg.get(section, value) + return auth_data + +# Note(xarses): be careful not to inject \n's into the regex pattern +# or it will case the maching to fail +RECOVERABLE = re.compile(( + '(HTTP\s+400\))|' + '(400-\{\'message\'\:\s+\'\'\})|' + '(\[Errno 111\]\s+Connection\s+refused)|' + '(503\s+Service\s+Unavailable)|' + '(504\s+Gateway\s+Time-out)|' + '(\:\s+Maximum\s+attempts\s+reached)|' + '(Unauthorized\:\s+bad\s+credentials)|' + '(Max\s+retries\s+exceeded)|' + """('*NoneType'*\s+object\s+ha'\s+no\s+attribute\s+'*__getitem__'*$)|""" + '(No\s+route\s+to\s+host$)|' + '(Lost\s+connection\s+to\s+MySQL\s+server)'), flags=re.M) + +RETRY_COUNT = 50 +RETRY_DELAY = 2 + + +def retry(func, pattern=RECOVERABLE): + @functools.wraps(func) + def wrapper(*args, **kwargs): + i = 0 + while True: + try: + return func(*args, **kwargs) + except Exception as e: + if pattern and not pattern.match(e.message): + raise e + i += 1 + if i >= RETRY_COUNT: + raise e + print("retry request {0}: {1}".format(i, e)) + sleep(RETRY_DELAY) + return wrapper + + +class NeutronCleaner(object): + PORT_NAME_PREFIXES_BY_DEV_OWNER = { + 'network:dhcp': 'tap', + 'network:router_gateway': 'qg-', + 'network:router_interface': 'qr-', + } + PORT_NAME_PREFIXES = { + # contains tuples of prefixes + 'dhcp': (PORT_NAME_PREFIXES_BY_DEV_OWNER['network:dhcp'],), + 'l3': ( + PORT_NAME_PREFIXES_BY_DEV_OWNER['network:router_gateway'], + PORT_NAME_PREFIXES_BY_DEV_OWNER['network:router_interface'] + ) + } + BRIDGES_FOR_PORTS_BY_AGENT = { + 'dhcp': ('br-int',), + 'l3': ('br-int', 'br-ex'), + } + PORT_OWNER_PREFIXES = { + 'dhcp': ('network:dhcp',), + 'l3': ('network:router_gateway', 'network:router_interface') + } + NS_NAME_PREFIXES = { + 'dhcp': 'qdhcp', + 'l3': 'qrouter', + } + AGENT_BINARY_NAME = { + 'dhcp': 'neutron-dhcp-agent', + 'l3': 'neutron-l3-agent', + 'ovs': 'neutron-openvswitch-agent' + } + + CMD__list_ovs_port = ['ovs-vsctl', 'list-ports'] + CMD__remove_ovs_port = ['ovs-vsctl', '--', '--if-exists', 'del-port'] + CMD__remove_ip_addr = ['ip', 'address', 'delete'] + CMD__ip_netns_list = ['ip', 'netns', 'list'] + CMD__ip_netns_exec = ['ip', 'netns', 'exec'] + + # 14: tap-xxxyyyzzz: + RE__port_in_portlist = re.compile(r"^\s*\d+\:\s+([\w-]+)\:") + + def __init__(self, options, log=None): + self.log = log + self.auth_data = get_auth_data(cfg_file=options.get('authconf')) + self.options = options + self.agents = {} + self.debug = options.get('debug') + self.RESCHEDULING_CALLS = { + 'dhcp': self._reschedule_agent_dhcp, + 'l3': self._reschedule_agent_l3, + } + + self._client = None + + @property + @retry + def client(self): + if self._client is None: + self._client = n_client.Client(API_VER, **self.auth_data) + return self._client + + @retry + def _get_agents(self, use_cache=True): + return self.client.list_agents()['agents'] + + @retry + def _get_routers(self, use_cache=True): + return self.client.list_routers()['routers'] + + @retry + def _get_networks(self, use_cache=True): + return self.client.list_networks()['networks'] + + @retry + def _list_networks_on_dhcp_agent(self, agent_id): + return self.client.list_networks_on_dhcp_agent( + agent_id)['networks'] + + @retry + def _list_routers_on_l3_agent(self, agent_id): + return self.client.list_routers_on_l3_agent( + agent_id)['routers'] + + @retry + def _list_l3_agents_on_router(self, router_id): + return self.client.list_l3_agent_hosting_routers( + router_id)['agents'] + + @retry + def _list_dhcp_agents_on_network(self, network_id): + return self.client.list_dhcp_agent_hosting_networks( + network_id)['agents'] + + def _list_orphaned_networks(self): + networks = self._get_networks() + self.log.debug( + "_list_orphaned_networks:, got list of networks {0}".format( + json.dumps(networks, indent=4))) + orphaned_networks = [] + for network in networks: + if len(self._list_dhcp_agents_on_network(network['id'])) == 0: + orphaned_networks.append(network['id']) + self.log.debug( + "_list_orphaned_networks:, got list of orphaned networks {0}". + format(orphaned_networks)) + return orphaned_networks + + def _list_orphaned_routers(self): + routers = self._get_routers() + self.log.debug( + "_list_orphaned_routers:, got list of routers {0}".format( + json.dumps(routers, indent=4))) + orphaned_routers = [] + for router in routers: + if len(self._list_l3_agents_on_router(router['id'])) == 0: + orphaned_routers.append(router['id']) + self.log.debug( + "_list_orphaned_routers:, got list of orphaned routers {0}".format( + orphaned_routers)) + return orphaned_routers + + @retry + def _add_network_to_dhcp_agent(self, agent_id, net_id): + return self.client.add_network_to_dhcp_agent( + agent_id, {"network_id": net_id}) + + @retry + def _add_router_to_l3_agent(self, agent_id, router_id): + return self.client.add_router_to_l3_agent( + agent_id, {"router_id": router_id}) + + @retry + def _remove_router_from_l3_agent(self, agent_id, router_id): + return self.client.remove_router_from_l3_agent( + agent_id, router_id) + + @retry + def _delete_agent(self, agent_id): + return self.client.delete_agent(agent_id) + + def _get_agents_by_type(self, agent, use_cache=True): + self.log.debug("_get_agents_by_type: start.") + rv = self.agents.get(agent, []) if use_cache else [] + if not rv: + agents = self._get_agents(use_cache=use_cache) + for i in agents: + if i['binary'] == self.AGENT_BINARY_NAME.get(agent): + rv.append(i) + from_cache = '' + else: + from_cache = ' from local cache' + self.log.debug( + "_get_agents_by_type: end, {0} rv: {1}".format( + from_cache, json.dumps(rv, indent=4))) + return rv + + def __collect_namespaces_for_agent(self, agent): + cmd = self.CMD__ip_netns_list[:] + self.log.debug("Execute command '{0}'".format(' '.join(cmd))) + process = subprocess.Popen( + cmd, + shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + rc = process.wait() + if rc != 0: + self.log.error( + "ERROR (rc={0}) while execution {1}".format( + rc, ' '.join(cmd))) + return [] + # filter namespaces by given agent type + netns = [] + stdout = process.communicate()[0] + for ns in StringIO.StringIO(stdout): + ns = ns.strip() + self.log.debug("Found network namespace '{0}'".format(ns)) + if ns.startswith(self.NS_NAME_PREFIXES[agent]): + netns.append(ns) + return netns + + def __collect_ports_for_namespace(self, ns): + cmd = self.CMD__ip_netns_exec[:] + cmd.extend([ns, 'ip', 'l', 'show']) + self.log.debug("Execute command '{0}'".format(' '.join(cmd))) + process = subprocess.Popen( + cmd, + shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + rc = process.wait() + if rc != 0: + self.log.error( + "ERROR (rc={0}) while execution {1}".format( + rc, ' '.join(cmd))) + return [] + ports = [] + stdout = process.communicate()[0] + for line in StringIO.StringIO(stdout): + pp = self.RE__port_in_portlist.match(line) + if not pp: + continue + port = pp.group(1) + if port != 'lo': + self.log.debug("Found port '{0}'".format(port)) + ports.append(port) + return ports + + def _cleanup_ports(self, agent): + self.log.debug("_cleanup_ports: start.") + + # get namespaces list + netns = self.__collect_namespaces_for_agent(agent) + + # collect ports from namespace + ports = [] + for ns in netns: + ports.extend(self.__collect_ports_for_namespace(ns)) + + # iterate by port_list and remove port from OVS + for port in ports: + cmd = self.CMD__remove_ovs_port[:] + cmd.append(port) + if self.options.get('noop'): + self.log.info("NOOP-execution: '{0}'".format(' '.join(cmd))) + else: + self.log.debug("Execute command '{0}'".format(' '.join(cmd))) + process = subprocess.Popen( + cmd, + shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + rc = process.wait() + if rc != 0: + self.log.error( + "ERROR (rc={0}) while execution {1}".format( + rc, ' '.join(cmd))) + self.log.debug("_cleanup_ports: end.") + + return True + + def _reschedule_agent_dhcp(self, agent_type): + self.log.debug("_reschedule_agent_dhcp: start.") + agents = { + 'alive': [], + 'dead': [] + } + # collect networklist from dead DHCP-agents + dead_networks = [] + for agent in self._get_agents_by_type(agent_type): + if agent['alive']: + self.log.info( + "found alive DHCP agent: {0}".format(agent['id'])) + agents['alive'].append(agent) + else: + # dead agent + self.log.info( + "found dead DHCP agent: {0}".format(agent['id'])) + agents['dead'].append(agent) + for net in self._list_networks_on_dhcp_agent(agent['id']): + dead_networks.append(net) + + if dead_networks and agents['alive']: + # get network-ID list of already attached to alive agent networks + lucky_ids = set() + map( + lambda net: lucky_ids.add(net['id']), + self._list_networks_on_dhcp_agent(agents['alive'][0]['id']) + ) + # add dead networks to alive agent + for net in dead_networks: + if net['id'] not in lucky_ids: + # attach network to agent + self.log.info( + "attach network {net} to DHCP agent {agent}".format( + net=net['id'], + agent=agents['alive'][0]['id'])) + if not self.options.get('noop'): + self._add_network_to_dhcp_agent( + agents['alive'][0]['id'], net['id']) + + # remove dead agents if need (and if found alive agent) + if self.options.get('remove-dead'): + for agent in agents['dead']: + self.log.info( + "remove dead DHCP agent: {0}".format(agent['id'])) + if not self.options.get('noop'): + self._delete_agent(agent['id']) + orphaned_networks = self._list_orphaned_networks() + self.log.info("_reschedule_agent_dhcp: rescheduling orphaned networks") + if orphaned_networks and agents['alive']: + for network in orphaned_networks: + self.log.info( + "_reschedule_agent_dhcp: rescheduling {0} to {1}".format( + network, agents['alive'][0]['id'])) + if not self.options.get('noop'): + self._add_network_to_dhcp_agent( + agents['alive'][0]['id'], network) + self.log.info( + "_reschedule_agent_dhcp: ended rescheduling of orphaned networks") + self.log.debug("_reschedule_agent_dhcp: end.") + + def _reschedule_agent_l3(self, agent_type): + self.log.debug("_reschedule_agent_l3: start.") + agents = { + 'alive': [], + 'dead': [] + } + # collect router-list from dead DHCP-agents + dead_routers = [] # array of tuples (router, agentID) + for agent in self._get_agents_by_type(agent_type): + if agent['alive']: + self.log.info("found alive L3 agent: {0}".format(agent['id'])) + agents['alive'].append(agent) + else: + # dead agent + self.log.info("found dead L3 agent: {0}".format(agent['id'])) + agents['dead'].append(agent) + map( + lambda rou: dead_routers.append((rou, agent['id'])), + self._list_routers_on_l3_agent(agent['id']) + ) + self.log.debug( + "L3 agents in cluster: {0}".format( + json.dumps(agents, indent=4))) + self.log.debug("Routers, attached to dead L3 agents: {0}".format( + json.dumps(dead_routers, indent=4))) + + if dead_routers and agents['alive']: + # get router-ID list of already attached to alive agent routerss + lucky_ids = set() + map( + lambda rou: lucky_ids.add(rou['id']), + self._list_routers_on_l3_agent(agents['alive'][0]['id']) + ) + # remove dead agents after rescheduling + for agent in agents['dead']: + self.log.info("remove dead L3 agent: {0}".format(agent['id'])) + if not self.options.get('noop'): + self._delete_agent(agent['id']) + # move routers from dead to alive agent + for rou in filter( + lambda rr: not(rr[0]['id'] in lucky_ids), dead_routers): + self.log.info( + "schedule router {0} to L3 agent {1}".format( + rou[0]['id'], + agents['alive'][0]['id'])) + if not self.options.get('noop'): + self._add_router_to_l3_agent( + agents['alive'][0]['id'], rou[0]['id']) + + orphaned_routers = self._list_orphaned_routers() + self.log.info("_reschedule_agent_l3: rescheduling orphaned routers") + if orphaned_routers and agents['alive']: + for router in orphaned_routers: + self.log.info( + "_reschedule_agent_l3: rescheduling {0} to {1}".format( + router, agents['alive'][0]['id'])) + if not self.options.get('noop'): + self._add_router_to_l3_agent( + agents['alive'][0]['id'], router) + self.log.info( + "_reschedule_agent_l3: ended rescheduling of orphaned routers") + self.log.debug("_reschedule_agent_l3: end.") + + def _remove_self(self, agent_type): + self.log.debug("_remove_self: start.") + for agent in self._get_agents_by_type(agent_type): + if agent['host'] == socket.gethostname(): + self.log.info( + "_remove_self: deleting our own agent {0} of type {1}". + format(agent['id'], agent_type)) + if not self.options.get('noop'): + self._delete_agent(agent['id']) + self.log.debug("_remove_self: end.") + + def _reschedule_agent(self, agent): + self.log.debug("_reschedule_agents: start.") + task = self.RESCHEDULING_CALLS.get(agent, None) + if task: + task(agent) + self.log.debug("_reschedule_agents: end.") + + def do(self, agent): + if self.options.get('cleanup-ports'): + self._cleanup_ports(agent) + if self.options.get('reschedule'): + self._reschedule_agent(agent) + if self.options.get('remove-self'): + self._remove_self(agent) + + def _test_healthy(self, agent_list, hostname): + rv = False + for agent in agent_list: + if agent['host'] == hostname and agent['alive']: + return True + return rv + + def test_healthy(self, agent_type): + # OCF_FAILED_MASTER, + # http://www.linux-ha.org/doc/dev-guides/_literal_ocf_failed_master_literal_9.html + + rc = 9 + agentlist = self._get_agents_by_type(agent_type) + for hostname in self.options.get('test-hostnames'): + if self._test_healthy(agentlist, hostname): + return 0 + return rc + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Neutron network node cleaning tool.') + parser.add_argument( + "-c", + "--auth-config", + dest="authconf", + default="/etc/neutron/neutron.conf", + help="Read authconfig from service file", + metavar="FILE") + parser.add_argument( + "-t", + "--auth-token", + dest="auth-token", + default=None, + help="Authenticating token (instead username/passwd)", + metavar="TOKEN") + parser.add_argument( + "-u", + "--admin-auth-url", + dest="admin-auth-url", + default=None, + help="Authenticating URL (admin)", + metavar="URL") + parser.add_argument( + "--retries", + dest="retries", + type=int, + default=50, + help="try NN retries for API call", + metavar="NN") + parser.add_argument( + "--sleep", + dest="sleep", + type=int, + default=2, + help="sleep seconds between retries", + metavar="SEC") + parser.add_argument( + "-a", + "--agent", + dest="agent", + action="append", + help="specyfy agents for cleaning", + required=True) + parser.add_argument( + "--cleanup-ports", + dest="cleanup-ports", + action="store_true", + default=False, + help="cleanup ports for given agents on this node") + parser.add_argument( + "--remove-self", + dest="remove-self", + action="store_true", + default=False, + help="remove ourselves from agent list") + parser.add_argument( + "--activeonly", + dest="activeonly", + action="store_true", + default=False, + help="cleanup only active ports") + parser.add_argument( + "--reschedule", + dest="reschedule", + action="store_true", + default=False, + help="reschedule given agents") + parser.add_argument( + "--remove-dead", + dest="remove-dead", + action="store_true", + default=False, + help="remove dead agents while rescheduling") + parser.add_argument( + "--test-alive-for-hostname", + dest="test-hostnames", + action="append", + help="testing agent's healthy for given hostname") + parser.add_argument( + "--external-bridge", + dest="external-bridge", + default="br-ex", + help="external bridge name", + metavar="IFACE") + parser.add_argument( + "--integration-bridge", + dest="integration-bridge", + default="br-int", + help="integration bridge name", + metavar="IFACE") + parser.add_argument( + "-l", + "--log", + dest="log", + action="store", + help="log to file instead of STDOUT") + parser.add_argument( + "--noop", + dest="noop", + action="store_true", + default=False, + help="do not execute, print to log instead") + parser.add_argument( + "--debug", + dest="debug", + action="store_true", + default=False, + help="debug") + args = parser.parse_args() + RETRY_COUNT = args.retries + RETRY_DELAY = args.sleep + + # setup logging + if args.log: + LOG = make_logger( + handler=logging.handlers.WatchedFileHandler(args.log)) + + if args.debug: + LOG.setLevel(logging.DEBUG) + + LOG.info("Started: {0}".format(' '.join(sys.argv))) + cleaner = NeutronCleaner(options=vars(args), log=LOG) + rc = 0 + if vars(args).get('test-hostnames'): + rc = cleaner.test_healthy(args.agent[0]) + else: + for i in args.agent: + cleaner.do(i) + LOG.debug("End.") + sys.exit(rc) diff --git a/files/fuel-ha-utils/tools/wsrepclustercheckrc b/files/fuel-ha-utils/tools/wsrepclustercheckrc new file mode 100644 index 0000000000..46a3a17bc0 --- /dev/null +++ b/files/fuel-ha-utils/tools/wsrepclustercheckrc @@ -0,0 +1,12 @@ + +MYSQL_USERNAME="clustercheck" +MYSQL_PASSWORD="status" +MYSQL_HOST="127.0.0.1" +MYSQL_PORT="3307" +AVAILABLE_WHEN_DONOR=${3:-1} +ERR_FILE="${4:-/dev/null}" +AVAILABLE_WHEN_READONLY=${5:-1} +DEFAULTS_EXTRA_FILE=${6:-/etc/my.cnf} + +#Timeout exists for instances where mysqld may be hung +TIMEOUT=10 diff --git a/files/fuel-misc/centos_ifdown-local b/files/fuel-misc/centos_ifdown-local new file mode 100644 index 0000000000..e0b3b41ea1 --- /dev/null +++ b/files/fuel-misc/centos_ifdown-local @@ -0,0 +1,5 @@ +#!/bin/sh +SCRIPT="/etc/sysconfig/network-scripts/interface-down-script-$1" +if [[ -x $SCRIPT ]] ; then +. $SCRIPT +fi diff --git a/files/fuel-misc/centos_ifup-local b/files/fuel-misc/centos_ifup-local new file mode 100644 index 0000000000..9b4011598b --- /dev/null +++ b/files/fuel-misc/centos_ifup-local @@ -0,0 +1,5 @@ +#!/bin/sh +SCRIPT="/etc/sysconfig/network-scripts/interface-up-script-$1" +if [[ -x $SCRIPT ]] ; then +. $SCRIPT +fi diff --git a/deployment/puppet/haproxy/templates/haproxy-status.sh.erb b/files/fuel-misc/haproxy-status.sh similarity index 91% rename from deployment/puppet/haproxy/templates/haproxy-status.sh.erb rename to files/fuel-misc/haproxy-status.sh index 867943e08e..3099a3a97c 100644 --- a/deployment/puppet/haproxy/templates/haproxy-status.sh.erb +++ b/files/fuel-misc/haproxy-status.sh @@ -1,7 +1,7 @@ #!/bin/sh show_stats() { - echo 'show stat' | socat 'UNIX-CONNECT:<%= @haproxy_socket %>' STDIO | awk \ + echo 'show stat' | socat 'UNIX-CONNECT:/var/lib/haproxy/stats' STDIO | awk \ ' function fillstr(string, num) { diff --git a/specs/fuel-library6.1.spec b/specs/fuel-library6.1.spec new file mode 100644 index 0000000000..c87306df5a --- /dev/null +++ b/specs/fuel-library6.1.spec @@ -0,0 +1,168 @@ +Summary: Fuel-Library: a set of deployment manifests of Fuel for OpenStack +Name: fuel-library6.1 +#FIXME(aglarendil): this is a temp change for scripts compatibility. Should be bumped everywhere +Version: 6.0.0 +Release: 1 +Group: System Environment/Libraries +License: GPLv2 +URL: http://github.com/stackforge/fuel-library +Source0: %{name}-%{version}.tar.gz +Provides: fuel-library +BuildArch: noarch +BuildRoot: %{_tmppath}/fuel-library-%{version}-%{release} + +%define files_source %{_builddir}/%{name}-%{version}/files +%define dockerctl_source %{files_source}/fuel-docker-utils +%define openstack_version 2014.2-6.1 + +%description + +Fuel is the Ultimate Do-it-Yourself Kit for OpenStack +Purpose built to assimilate the hard-won experience of our services team, it contains the tooling, information, and support you need to accelerate time to production with OpenStack cloud. OpenStack is a very versatile and flexible cloud management platform. By exposing its portfolio of cloud infrastructure services – compute, storage, networking and other core resources — through ReST APIs, it enables a wide range of control over these services, both from the perspective of an integrated Infrastructure as a Service (IaaS) controlled by applications, as well as automated manipulation of the infrastructure itself. This architectural flexibility doesn’t set itself up magically; it asks you, the user and cloud administrator, to organize and manage a large array of configuration options. Consequently, getting the most out of your OpenStack cloud over time – in terms of flexibility, scalability, and manageability – requires a thoughtful combination of automation and configuration choices. + +This package contains deployment manifests and code to execute provisioning of master and slave nodes. + +%prep +%setup -cq + +%install +mkdir -p %{buildroot}/etc/puppet/%{openstack_version}/modules/ +mkdir -p %{buildroot}/etc/puppet/%{openstack_version}/manifests/ +mkdir -p %{buildroot}/etc/profile.d/ +mkdir -p %{buildroot}/etc/dockerctl +mkdir -p %{buildroot}/usr/bin/ +mkdir -p %{buildroot}/usr/lib/ +mkdir -p %{buildroot}/usr/share/dockerctl +mkdir -p %{buildroot}/sbin/ +cp -fr %{_builddir}/%{name}-%{version}/deployment/puppet/* %{buildroot}/etc/puppet/%{openstack_version}/modules/ +#FUEL DOCKERCTL UTILITY +install -m 0644 %{dockerctl_source}/dockerctl-alias.sh %{buildroot}/etc/profile.d/dockerctl.sh +install -m 0755 %{dockerctl_source}/dockerctl %{buildroot}/usr/bin +install -m 0755 %{dockerctl_source}/get_service_credentials.py %{buildroot}/usr/bin +install -m 0644 %{dockerctl_source}/dockerctl_config %{buildroot}/etc/dockerctl/config +install -m 0644 %{dockerctl_source}/functions.sh %{buildroot}/usr/share/dockerctl/ +#fuel-misc +install -m 0755 %{files_source}/fuel-misc/centos_ifdown-local %{buildroot}/sbin/ifup-local +install -m 0755 %{files_source}/fuel-misc/centos_ifup-local %{buildroot}/sbin/ifdown-local +install -m 0755 %{files_source}/fuel-misc/haproxy-status.sh %{buildroot}/usr/bin/haproxy-status +#fuel-ha-utils +install -d -m 0755 %{buildroot}/usr/lib/ocf/resource.d/fuel +install -m 0755 %{files_source}/fuel-ha-utils/ocf/ns_haproxy %{buildroot}/usr/lib/ocf/resource.d/fuel/ns_haproxy +install -m 0755 %{files_source}/fuel-ha-utils/ocf/mysql-wss %{buildroot}/usr/lib/ocf/resource.d/fuel/mysql-wss +install -m 0755 %{files_source}/fuel-ha-utils/ocf/ns_dns %{buildroot}/usr/lib/ocf/resource.d/fuel/ns_dns +install -m 0755 %{files_source}/fuel-ha-utils/ocf/heat_engine_centos %{buildroot}/usr/lib/ocf/resource.d/fuel/heat-engine +install -m 0755 %{files_source}/fuel-ha-utils/ocf/ns_ntp %{buildroot}/usr/lib/ocf/resource.d/fuel/ns_ntp +install -m 0755 %{files_source}/fuel-ha-utils/ocf/ocf-neutron-ovs-agent %{buildroot}/usr/lib/ocf/resource.d/fuel/ocf-neutron-ovs-agent +install -m 0755 %{files_source}/fuel-ha-utils/ocf/ocf-neutron-metadata-agent %{buildroot}/usr/lib/ocf/resource.d/fuel/ocf-neutron-metadata-agent +install -m 0755 %{files_source}/fuel-ha-utils/ocf/ocf-neutron-dhcp-agent %{buildroot}/usr/lib/ocf/resource.d/fuel/ocf-neutron-dhcp-agent +install -m 0755 %{files_source}/fuel-ha-utils/ocf/ocf-neutron-l3-agent %{buildroot}/usr/lib/ocf/resource.d/fuel/ocf-neutron-l3-agent +install -m 0755 %{files_source}/fuel-ha-utils/ocf/rabbitmq %{buildroot}/usr/lib/ocf/resource.d/fuel/rabbitmq-server +install -m 0755 %{files_source}/fuel-ha-utils/ocf/ns_IPaddr2 %{buildroot}/usr/lib/ocf/resource.d/fuel/ns_IPaddr2 +install -m 0755 %{files_source}/fuel-ha-utils/ocf/ceilometer-agent-central %{buildroot}/usr/lib/ocf/resource.d/fuel/ceilometer-agent-central +install -m 0755 %{files_source}/fuel-ha-utils/ocf/ceilometer-alarm-evaluator %{buildroot}/usr/lib/ocf/resource.d/fuel/ceilometer-alarm-evaluator +install -m 0755 %{files_source}/fuel-ha-utils/tools/q-agent-cleanup.py %{buildroot}/usr/bin/q-agent-cleanup.py +install -m 0755 %{files_source}/fuel-ha-utils/tools/clustercheck %{buildroot}/usr/bin/clustercheck +install -m 0644 %{files_source}/fuel-ha-utils/tools/wsrepclustercheckrc %{buildroot}/etc/wsrepclustercheckrc +#FIXME - may be we need to put this also into packages +#install -m 0755 TEMPLATE /usr/local/bin/puppet-pull +#install -m 0755 -d deployment/puppet/sahara/templates /usr/share/sahara/templates +#install -m 0755 deployment/puppet/sahara/create_templates.sh /usr/share/sahara/templates/create_templates.sh +#install -m 0755 TEMPLATE /usr/local/bin/swift-rings-rebalance.sh +#install -m 0755 TEMPLATE /usr/local/bin/swift-rings-sync.sh + +%post -p /bin/bash +#Update puppet manifests symlinks to the latest version +for i in modules manifests +do + if [ -L /etc/puppet/${i} ] + then + unlink /etc/puppet/${i} + elif [ -d /etc/puppet/${i} ] + then + mv /etc/puppet/${i} /etc/puppet/${i}.old + fi + ln -s /etc/puppet/%{openstack_version}/${i} /etc/puppet/${i} +done + +%files +/etc/puppet/%{openstack_version}/modules/ +/etc/puppet/%{openstack_version}/manifests/ + +%package -n fuel-dockerctl +Summary: Fuel project utilities for Docker container management tool +Version: 6.1 +Release: 1 +Group: System Environment/Libraries +License: GPLv2 +Provides: fuel-docker-utils +URL: http://github.com/stackforge/fuel-library +BuildArch: noarch +BuildRoot: %{_tmppath}/fuel-library-%{version}-%{release} + +%description -n fuel-dockerctl +This package contains a set of helpers to manage docker containers +during Fuel All-in-One deployment toolkit installation + +%files -n fuel-dockerctl +/etc/profile.d/dockerctl.sh +/usr/bin/dockerctl +/usr/bin/get_service_credentials.py +/usr/share/dockerctl/functions.sh +%config(noreplace) /etc/dockerctl/config + +%package -n fuel-misc +Summary: Fuel project misc utilities +Version: 6.1 +Release: 1 +Group: System Environment/Libraries +License: Apache 2.0 +URL: http://github.com/stackforge/fuel-library +BuildArch: noarch +BuildRoot: %{_tmppath}/fuel-library-%{version}-%{release} + +%description -n fuel-misc +A set of scripts for Fuel deployment utility +For further information go to http://wiki.openstack.org/Fuel + +%files -n fuel-misc + +#fuel-misc +%defattr(-,root,root) +/sbin/ifup-local +/sbin/ifdown-local +/usr/bin/haproxy-status + +%package -n fuel-ha-utils +Summary: Fuel project HA utilities +Version: 6.1 +Release: 1 +Group: System Environment/Libraries +# FIXME(aglarendil): mixed license actually - need to figure out the best option +License: GPLv2 +Requires: python-keystoneclient +Requires: python-neutronclient +URL: http://github.com/stackforge/fuel-library +BuildArch: noarch +BuildRoot: %{_tmppath}/fuel-library-%{version}-%{release} + +%description -n fuel-ha-utils +A set of scripts for Fuel deployment utility HA deployment +For further information go to http://wiki.openstack.org/Fuel + +%files -n fuel-ha-utils +%defattr(-,root,root) +/usr/lib/ocf/resource.d/fuel +/usr/bin/q-agent-cleanup.py +/usr/bin/clustercheck +%config(noreplace) /etc/wsrepclustercheckrc +# + + + + +%clean +rm -rf ${buildroot} + +%changelog +* Tue Sep 10 2013 Vladimir Kuklin - 6.1 +- Create spec