Support stop provisioning mechanism
* ability to run command at remote nodes using SSH; * add stop_provision action. Implements: blueprint fuel-stop-provision Change-Id: Ibcd588fc3dae5961ea51239cad6f2bdee5f16bbf
This commit is contained in:
parent
2aba323f3a
commit
5c9d8cb355
|
@ -15,6 +15,7 @@ Gem::Specification.new do |s|
|
|||
s.add_dependency 'symboltable', '1.0.2'
|
||||
s.add_dependency 'rest-client', '~> 1.6.7'
|
||||
s.add_dependency 'popen4', '~> 0.1.2'
|
||||
s.add_dependency 'net-ssh-multi', '~> 1.1'
|
||||
|
||||
# Astute as service
|
||||
s.add_dependency 'amqp', '0.9.10'
|
||||
|
|
|
@ -36,6 +36,9 @@ require 'astute/post_deploy_actions'
|
|||
require 'astute/post_deploy_actions/restart_radosgw'
|
||||
require 'astute/post_deploy_actions/update_cluster_hosts_info'
|
||||
require 'astute/post_deploy_actions/upload_cirros_image'
|
||||
require 'astute/ssh'
|
||||
require 'astute/ssh_actions/ssh_erase_nodes'
|
||||
require 'astute/ssh_actions/ssh_hard_reboot'
|
||||
|
||||
# Server
|
||||
require 'astute/server/worker'
|
||||
|
|
|
@ -72,6 +72,8 @@ module Astute
|
|||
conf[:PUPPET_SSH_KEYS] = ['neutron', 'nova', 'ceph', 'mysql'] # name of ssh keys what will be generated
|
||||
#and uploaded to all nodes before deploy
|
||||
conf[:MAX_NODES_PER_CALL] = 50 # how many nodes to deploy in one puppet call
|
||||
conf[:SSH_RETRIES] = 5 # SSH tries to call ssh client before failure
|
||||
conf[:SSH_RETRY_TIMEOUT] = 30 # SSH sleeps for ## sec between retries
|
||||
|
||||
# Server settings
|
||||
conf[:broker_host] = 'localhost'
|
||||
|
|
|
@ -155,6 +155,16 @@ module Astute
|
|||
puppetd.stop_and_disable
|
||||
end
|
||||
|
||||
def stop_provision(reporter, task_id, engine_attrs, nodes)
|
||||
Ssh.execute(Context.new(task_id, reporter), nodes, SshEraseNodes.command)
|
||||
CobblerManager.new(engine_attrs, reporter).remove_nodes(nodes)
|
||||
Ssh.execute(Context.new(task_id, reporter),
|
||||
nodes,
|
||||
SshHardReboot.command,
|
||||
timeout=5,
|
||||
retries=1)
|
||||
end
|
||||
|
||||
def dump_environment(reporter, task_id, lastdump)
|
||||
Dump.dump_environment(Context.new(task_id, reporter), lastdump)
|
||||
end
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
# Copyright 2013 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
require 'net/ssh/multi'
|
||||
require 'timeout'
|
||||
|
||||
module Astute
|
||||
class Ssh
|
||||
|
||||
def self.execute(ctx, nodes, cmd, timeout=60, retries=Astute.config.SSH_RETRIES)
|
||||
nodes_to_process = nodes.map { |n| n['slave_name'] }
|
||||
|
||||
Astute.logger.debug "Run shell command '#{cmd}' using ssh"
|
||||
ready_nodes = []
|
||||
error_nodes = []
|
||||
|
||||
retries.times do |i|
|
||||
Astute.logger.debug "Run shell command using ssh. Retry #{i}"
|
||||
Astute.logger.debug "Affected nodes: #{nodes_to_process}"
|
||||
|
||||
new_ready_nodes, new_error_nodes, nodes_to_process = run_remote_command(nodes_to_process, cmd, timeout)
|
||||
Astute.logger.debug "Retry result: "\
|
||||
"success nodes: #{new_ready_nodes}, "\
|
||||
"error nodes: #{new_error_nodes}, "\
|
||||
"inaccessible nodes: #{nodes_to_process}"
|
||||
|
||||
ready_nodes += new_ready_nodes
|
||||
error_nodes += new_error_nodes
|
||||
|
||||
break if nodes_to_process.empty?
|
||||
|
||||
sleep Astute.config.SSH_RETRY_TIMEOUT
|
||||
end
|
||||
|
||||
inaccessible_nodes = nodes_to_process
|
||||
nodes_uids = nodes.map { |n| n['uid'] }
|
||||
|
||||
answer = {'nodes' => to_report_format(ready_nodes, nodes)}
|
||||
if inaccessible_nodes.present?
|
||||
answer.merge!({'inaccessible_nodes' => to_report_format(inaccessible_nodes, nodes)})
|
||||
Astute.logger.warn "#{ctx.task_id}: Running shell command on nodes #{nodes_uids.inspect} finished " \
|
||||
"with errors. Nodes #{answer['inaccessible_nodes'].inspect} are inaccessible"
|
||||
end
|
||||
|
||||
if error_nodes.present?
|
||||
answer.merge!({'status' => 'error', 'error_nodes' => to_report_format(error_nodes, nodes)})
|
||||
|
||||
Astute.logger.error "#{ctx.task_id}: Running shell command on nodes #{nodes_uids.inspect} finished " \
|
||||
"with errors: #{answer['error_nodes'].inspect}"
|
||||
end
|
||||
Astute.logger.info "#{ctx.task_id}: Finished running shell command: #{nodes_uids.inspect}"
|
||||
|
||||
answer
|
||||
end
|
||||
|
||||
|
||||
private
|
||||
|
||||
def self.to_report_format(slave_names, nodes)
|
||||
result_nodes = nodes.select { |n| slave_names.include?(n['slave_name']) }
|
||||
result_nodes.inject([]) do |result, node|
|
||||
result << {'uid' => node['uid']} if node['uid']
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
def self.run_remote_command(nodes, cmd, timeout)
|
||||
servers = []
|
||||
channel = nil
|
||||
|
||||
Net::SSH::Multi.start(:concurrent_connections => Astute.config.MAX_NODES_PER_CALL,
|
||||
:on_error => :warn) do |session|
|
||||
nodes.each do |name|
|
||||
session.use name,
|
||||
:user => 'root',
|
||||
:host_key => 'ssh-rsa',
|
||||
:keys => ['/root/.ssh/id_rsa']
|
||||
end
|
||||
servers = session.servers_for
|
||||
|
||||
# execute commands on all servers
|
||||
# FIXME: debug not show a messages if command contain a several
|
||||
# strings
|
||||
channel = session.exec cmd do |ch, stream, data|
|
||||
Astute.logger.debug "[#{ch[:host]} : #{stream}] #{data}"
|
||||
end
|
||||
|
||||
Timeout::timeout(timeout) { session.loop }
|
||||
end
|
||||
|
||||
detect_status(servers)
|
||||
rescue Timeout::Error
|
||||
Astute.logger.debug "SSH session is closed due to the achievement of a timeout"
|
||||
return [[], [], nodes] unless servers
|
||||
|
||||
servers.each do |s|
|
||||
s.session.shutdown! && s.fail! if s.busy?
|
||||
end
|
||||
detect_status(servers)
|
||||
end
|
||||
|
||||
# TODO: support exit code from shell command
|
||||
def self.detect_status(servers)
|
||||
executed_nodes = []
|
||||
inaccessible_nodes = []
|
||||
servers.each do |s|
|
||||
s.failed? ? inaccessible_nodes << s.host : executed_nodes << s.host
|
||||
end
|
||||
[executed_nodes, [], inaccessible_nodes]
|
||||
end
|
||||
|
||||
end
|
||||
end
|
|
@ -0,0 +1,67 @@
|
|||
# Copyright 2013 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
module Astute
|
||||
class SshEraseNodes
|
||||
|
||||
def self.command
|
||||
<<-ERASE_COMMAND
|
||||
killall -STOP anaconda
|
||||
killall -STOP debootstrap dpkg
|
||||
echo "5" > /proc/sys/kernel/panic
|
||||
echo "1" > /proc/sys/kernel/sysrq
|
||||
|
||||
storages_codes="3, 8, 65, 66, 67, 68, 69, 70, 71, 104, 105, 106, 107, 108, 109, 110, 111, 202, 252, 253"
|
||||
|
||||
reboot_with_sleep() {
|
||||
sleep 5
|
||||
echo "1" > /proc/sys/kernel/panic_on_oops
|
||||
echo "10" > /proc/sys/kernel/panic
|
||||
echo "b" > /proc/sysrq-trigger
|
||||
}
|
||||
|
||||
erase_data() {
|
||||
echo "Run erase_node with dev= $1 length = $2 offset = $3 bs = $4"
|
||||
dd if=/dev/zero of=/dev/$1 bs=$2 count=$3 seek=$4 oflag=direct
|
||||
}
|
||||
|
||||
erase_boot_devices() {
|
||||
for d in /sys/block/*
|
||||
do
|
||||
basename_dir=$(basename $d)
|
||||
major_raw=$(udevadm info --query=property --name=$basename_dir | grep MAJOR | sed 's/ *$//g')
|
||||
major=$(echo ${major_raw##*=})
|
||||
|
||||
echo $storages_codes | grep -o "\b$major\b"
|
||||
if [ $? -ne 0 ]; then continue; fi
|
||||
|
||||
removable=$(grep -o '[[:digit:]]' /sys/block/$basename_dir/removable)
|
||||
if [ $removable -ne 0 ]; then continue; fi
|
||||
|
||||
size=$(cat /sys/block/$basename_dir/size)
|
||||
|
||||
erase_data $basename_dir 1 0 '1M'
|
||||
erase_data $basename_dir 1 $size '512'
|
||||
done
|
||||
}
|
||||
|
||||
echo "Run erase node command"
|
||||
erase_boot_devices
|
||||
|
||||
# Avoid shell hang using nohup and stdout/stderr redirections
|
||||
# nohup reboot_with_sleep > /dev/null 2>&1 &
|
||||
ERASE_COMMAND
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,27 @@
|
|||
# Copyright 2013 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
module Astute
|
||||
class SshHardReboot
|
||||
|
||||
def self.command
|
||||
<<-REBOOT_COMMAND
|
||||
echo "Run node rebooting command using 'SB' to sysrq-trigger"
|
||||
echo "1" > /proc/sys/kernel/panic_on_oops
|
||||
echo "10" > /proc/sys/kernel/panic
|
||||
echo "b" > /proc/sysrq-trigger
|
||||
REBOOT_COMMAND
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue