Make sure that ceph-nfs and the VIP start in the right order
Currently there is a race where ceph-nfs is free to be started before
the colocation constraint with the VIP is created. This issue is
exacerbated by the fact that the container being started (via a systemd
unit file managed by pacemaker) never fails in case the daemon
encounters an error, because it has inside it:
/usr/bin/ganesha.nfsd "${GANESHA_OPTIONS[@]}" -L /var/log/ganesha/ganesha.log "${GANESHA_EPOCH}" || return 0
Since the container never fails, docker never exits and so systemd and
pacemaker think everything is fine and it won't get rescheduled on
another node.
We can avoid this issue completely if we can guarantee that
we start the VIP and the ceph-nfs service on the bootstrap node only
(by tagging only that node with the right property) and only later we
add the node property on all the other nodes.
The drawback of this approach is that, barring any failures, ceph-nfs
will always be started on the master node for that service.
Change-Id: I78c16c9fb28211a7a8ec2187ae76b9a072b76ea2
(cherry picked from commit 318e20a828
)
This commit is contained in:
parent
501f10c485
commit
6805fcad88
@ -64,13 +64,18 @@ class tripleo::profile::pacemaker::ceph_nfs (
|
||||
}
|
||||
|
||||
if $step >= 2 {
|
||||
pacemaker::property { 'ceph-nfs-role-node-property':
|
||||
property => 'ceph-nfs-role',
|
||||
value => true,
|
||||
tries => $pcs_tries,
|
||||
node => $::hostname,
|
||||
}
|
||||
if $pacemaker_master {
|
||||
# At step2 we only create the node property on master so that
|
||||
# both VIP and (later at step5) ceph-nfs service can start on master
|
||||
# node only. This way we can guarantee that the VIP and ceph-nfs are
|
||||
# colocated. Later we expand the properties on all nodes where ceph_nfs
|
||||
# is supposed to run.
|
||||
pacemaker::property { 'ceph-nfs-role-node-property':
|
||||
property => 'ceph-nfs-role',
|
||||
value => true,
|
||||
tries => $pcs_tries,
|
||||
node => $::hostname,
|
||||
}
|
||||
pacemaker::resource::ip { 'ganesha_vip':
|
||||
ip_address => $ganesha_vip,
|
||||
cidr_netmask => $netmask,
|
||||
@ -116,9 +121,21 @@ class tripleo::profile::pacemaker::ceph_nfs (
|
||||
tag => 'pacemaker_constraint',
|
||||
}
|
||||
|
||||
# See comment on pacemaker::property at step2
|
||||
$ceph_nfs_short_node_names = hiera('ceph_nfs_short_node_names')
|
||||
$ceph_nfs_short_node_names.each |String $node_name| {
|
||||
pacemaker::property { "ceph-nfs-role-${node_name}":
|
||||
property => 'ceph-nfs-role',
|
||||
value => true,
|
||||
tries => $pcs_tries,
|
||||
node => $node_name,
|
||||
}
|
||||
}
|
||||
|
||||
Pacemaker::Resource::Ip['ganesha_vip']
|
||||
-> Pacemaker::Resource::Service['ceph-nfs']
|
||||
-> Pacemaker::Constraint::Order['ganesha_vip-then-ganesha']
|
||||
-> Pacemaker::Constraint::Colocation['ganesha_vip-with-ganesha']
|
||||
-> Pacemaker::Property<||>
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user