Prevent deletion of nodes which have running OSD processes
Node deletion will fail if a node with the ceph-osd role still has PGs placed on any of its OSDs. Removing too many OSDs can result in data loss. The end-user will be required to manually remove those OSDs from the cluster and allow it to rebalance to ensure no data is lost when deleting these nodes as described here: http://ceph.com/docs/master/rados/operations/add-or-rm-osds/#removing-osds-manual DocImpact Change-Id: I41173a83a3268455148652680a534e47296af319 Closes-bug: #1424060
This commit is contained in:
parent
5cdd4ae403
commit
1b42ec374a
|
@ -42,6 +42,7 @@ require 'astute/deploy_actions'
|
|||
require 'astute/nailgun_hooks'
|
||||
require 'astute/puppet_task'
|
||||
require 'astute/task_manager'
|
||||
require 'astute/pre_delete'
|
||||
|
||||
['/astute/pre_deployment_actions/*.rb',
|
||||
'/astute/pre_deploy_actions/*.rb',
|
||||
|
|
|
@ -106,6 +106,10 @@ module Astute
|
|||
Network.multicast_verification(ctx, nodes)
|
||||
end
|
||||
|
||||
def check_ceph_osds(reporter, task_id, nodes)
|
||||
PreDelete.check_ceph_osds(Context.new(task_id, reporter), nodes)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def deploy_cluster(up_reporter, task_id, deployment_info, deploy_engine, pre_deployment, post_deployment)
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
# Copyright 2015 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
module Astute
|
||||
module PreDelete
|
||||
|
||||
def self.check_ceph_osds(ctx, nodes)
|
||||
answer = {"status" => "ready"}
|
||||
ceph_nodes = nodes.select { |n| n["roles"].include? "ceph-osd" }
|
||||
ceph_osds = ceph_nodes.collect{ |n| n["slave_name"] }
|
||||
return answer if ceph_osds.empty?
|
||||
|
||||
cmd = "ceph -f json osd tree"
|
||||
shell = MClient.new(ctx, "execute_shell_command", [ceph_nodes[0]["id"]], timeout=60, retries=1)
|
||||
result = shell.execute(:cmd => cmd).first.results
|
||||
|
||||
osds = {}
|
||||
tree = JSON.parse(result[:data][:stdout])
|
||||
|
||||
tree["nodes"].each do |osd|
|
||||
osds[osd["name"]] = osd["children"] if ceph_osds.include? osd["name"]
|
||||
end
|
||||
|
||||
# pg dump lists all pgs in the cluster and where they are located.
|
||||
# $14 is the 'up set' (the list of OSDs responsible for a particular
|
||||
# pg for an epoch) and $16 is the 'acting set' (list of OSDs who
|
||||
# are [or were at some point] responsible for a pg). These sets
|
||||
# will generally be the same.
|
||||
osd_list = osds.values.flatten.join("|")
|
||||
cmd = "ceph pg dump 2>/dev/null | " \
|
||||
"awk '//{print $14, $16}' | " \
|
||||
"egrep -o '\\<(#{osd_list})\\>' | " \
|
||||
"sort -un"
|
||||
|
||||
result = shell.execute(:cmd => cmd).first.results
|
||||
rs = result[:data][:stdout].split("\n")
|
||||
|
||||
# JSON.parse returns the children as integers, so the result from the
|
||||
# shell command needs to be converted for the set operations to work.
|
||||
rs.map! { |x| x.to_i }
|
||||
|
||||
error_nodes = []
|
||||
osds.each do |name, children|
|
||||
error_nodes << name if rs & children != []
|
||||
end
|
||||
|
||||
if not error_nodes.empty?
|
||||
msg = "Ceph data still exists on: #{error_nodes.join(', ')}. " \
|
||||
"You must manually remove the OSDs from the cluster " \
|
||||
"and allow Ceph to rebalance before deleting these nodes."
|
||||
answer = {"status" => "error", "error" => msg}
|
||||
end
|
||||
|
||||
answer
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
|
@ -148,11 +148,15 @@ module Astute
|
|||
nodes = data['args']['nodes']
|
||||
engine = data['args']['engine']
|
||||
|
||||
result = if nodes.empty?
|
||||
result = @orchestrator.check_ceph_osds(reporter, task_uuid, nodes)
|
||||
|
||||
if result["status"] == "ready"
|
||||
if nodes.empty?
|
||||
Astute.logger.debug("#{task_uuid} Node list is empty")
|
||||
nil
|
||||
result = nil
|
||||
else
|
||||
@orchestrator.remove_nodes(reporter, task_uuid, engine, nodes)
|
||||
result = @orchestrator.remove_nodes(reporter, task_uuid, engine, nodes)
|
||||
end
|
||||
end
|
||||
|
||||
report_result(result, reporter)
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
# Copyright 2015 Mirantis, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
|
||||
require File.join(File.dirname(__FILE__), '../spec_helper')
|
||||
|
||||
describe '#check_ceph_osds' do
|
||||
include SpecHelpers
|
||||
|
||||
let(:ctx) { mock_ctx }
|
||||
let(:success_result) { {"status"=>"ready"} }
|
||||
|
||||
let(:mclient) do
|
||||
mclient = mock_rpcclient
|
||||
Astute::MClient.any_instance.stubs(:rpcclient).returns(mclient)
|
||||
Astute::MClient.any_instance.stubs(:log_result).returns(mclient)
|
||||
Astute::MClient.any_instance.stubs(:check_results_with_retries).returns(mclient)
|
||||
mclient
|
||||
end
|
||||
|
||||
def build_mcresult(stdout="", sender="1")
|
||||
rs = {:sender => sender, :data => {:stdout => stdout}}
|
||||
mcresult_mock = mock_mc_result(rs)
|
||||
mock_result = mock
|
||||
mock_result.stubs(:results).returns(rs)
|
||||
mock_result.stubs(:each).returns(mcresult_mock)
|
||||
[mock_result]
|
||||
end
|
||||
|
||||
context "no ceph-osd nodes" do
|
||||
let(:nodes) { [
|
||||
{"id" => "1", "roles" => ["controller"]},
|
||||
{"id" => "2", "roles" => ["compute"]}
|
||||
]
|
||||
}
|
||||
|
||||
it "should do nothing if no nodes have ceph-osd role" do
|
||||
expect(Astute::PreDelete.check_ceph_osds(ctx, nodes)).to eq(success_result)
|
||||
end
|
||||
end
|
||||
|
||||
context "nodes with ceph-osd role" do
|
||||
let(:nodes) { [
|
||||
{"id" => "1", "roles" => ["primary-controller"]},
|
||||
{"id" => "2", "roles" => ["compute", "ceph-osd"],
|
||||
"slave_name" => "node-2"}
|
||||
]
|
||||
}
|
||||
let(:pg_cmd) {
|
||||
cmd = "ceph pg dump 2>/dev/null | " \
|
||||
"awk '//{print $14, $16}' | " \
|
||||
"egrep -o '\\<(1|2)\\>' | " \
|
||||
"sort -un"
|
||||
}
|
||||
let(:osd_cmd) { "ceph -f json osd tree" }
|
||||
let(:json_resp) { '{"nodes": [{"name": "node-2", "children": [1,2]}]}'}
|
||||
let(:error_result) do
|
||||
msg = "Ceph data still exists on: node-2. You must manually " \
|
||||
"remove the OSDs from the cluster and allow Ceph to " \
|
||||
"rebalance before deleting these nodes."
|
||||
{"status" => "error", "error" => msg}
|
||||
end
|
||||
|
||||
it "should raise error if OSDs contain data" do
|
||||
mclient.expects(:execute).with({:cmd => osd_cmd})
|
||||
.returns(build_mcresult(stdout=json_resp))
|
||||
|
||||
mclient.expects(:execute).with({:cmd => pg_cmd})
|
||||
.returns(build_mcresult(stdout="1\n2"))
|
||||
|
||||
expect(Astute::PreDelete.check_ceph_osds(ctx, nodes)).to eq(error_result)
|
||||
end
|
||||
|
||||
it "should succeed with no pgs placed on node" do
|
||||
mclient.expects(:execute).with({:cmd => osd_cmd})
|
||||
.returns(build_mcresult(stdout=json_resp))
|
||||
|
||||
mclient.expects(:execute).with({:cmd => pg_cmd})
|
||||
.returns(build_mcresult(stdout="3\n4"))
|
||||
|
||||
expect(Astute::PreDelete.check_ceph_osds(ctx, nodes)).to eq(success_result)
|
||||
end
|
||||
end
|
||||
|
||||
end # describe
|
Loading…
Reference in New Issue