From a46d5c5a2099eeeddad0c0ceaa931579f0872a3f Mon Sep 17 00:00:00 2001 From: John Fulton Date: Fri, 16 Sep 2016 14:29:25 -0400 Subject: [PATCH] Deployment should fail when trying to add another Ceph cluster's OSD This change explicitly adds the FSID to the $cluster_option variable and causes Puppet to exit if OSD preparation/activation will fail because the OSD belongs to a different Ceph cluster as determined by an FSID mismatch. FSID mismatch is a symptom of attempting to install over another deploy. The FSID mismatch failure will be logged so that the user may determine the reason for failure and then choose to zap away the old deploy away before re-attempting deployment. Closes-Bug: 1604728 Change-Id: I61d18400754842860372c4cc5f3b80d104d59706 --- manifests/osd.pp | 27 +++++++++++++++++-- ...-check-fsid-mismatch-a5cb615be1b4e40f.yaml | 7 +++++ spec/defines/ceph_osd_spec.rb | 16 +++++++++-- 3 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/osd-check-fsid-mismatch-a5cb615be1b4e40f.yaml diff --git a/manifests/osd.pp b/manifests/osd.pp index 4a931e47..6c39806d 100644 --- a/manifests/osd.pp +++ b/manifests/osd.pp @@ -45,12 +45,16 @@ # on the directory backing the OSD service. # Optional. Defaults to 'ceph_var_lib_t' # +# [*fsid*] The ceph cluster FSID +# Optional. Defaults to $::ceph::profile::params::fsid +# define ceph::osd ( $ensure = present, $journal = undef, $cluster = undef, $exec_timeout = $::ceph::params::exec_timeout, $selinux_file_context = 'ceph_var_lib_t', + $fsid = $::ceph::profile::params::fsid, ) { $data = $name @@ -88,6 +92,26 @@ test -f ${udev_rules_file} && test \$DISABLE_UDEV -eq 1 logoutput => true, } + if $fsid { + $fsid_option = "--cluster-uuid ${fsid}" + $ceph_check_fsid_mismatch = "ceph-osd-check-fsid-mismatch-${name}" + Exec[$ceph_check_udev] -> Exec[$ceph_check_fsid_mismatch] + Exec[$ceph_check_fsid_mismatch] -> Exec[$ceph_prepare] + # return error if ${data} has fsid differing from ${fsid}, unless there is no fsid + exec { $ceph_check_fsid_mismatch: + command => "/bin/true # comment to satisfy puppet syntax requirements +set -ex +test ${fsid} = \$(ceph-disk list ${data} | egrep -o '[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}') +", + unless => "/bin/true # comment to satisfy puppet syntax requirements +set -ex +test -z \$(ceph-disk list ${data} | egrep -o '[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}') +", + logoutput => true, + timeout => $exec_timeout, + } + } + Exec[$ceph_check_udev] -> Exec[$ceph_prepare] # ceph-disk: prepare should be idempotent http://tracker.ceph.com/issues/7475 exec { $ceph_prepare: @@ -99,7 +123,7 @@ if ! test -b ${data} ; then chown -h ceph:ceph ${data} fi fi -ceph-disk prepare ${cluster_option} ${data} ${journal} +ceph-disk prepare ${cluster_option} ${fsid_option} ${data} ${journal} udevadm settle ", unless => "/bin/true # comment to satisfy puppet syntax requirements @@ -110,7 +134,6 @@ ceph-disk list | grep -E ' *${data}1? .*ceph data, (prepared|active)' || logoutput => true, timeout => $exec_timeout, } - if (str2bool($::selinux) == true) { ensure_packages($::ceph::params::pkg_policycoreutils, {'ensure' => 'present'}) exec { "fcontext_${name}": diff --git a/releasenotes/notes/osd-check-fsid-mismatch-a5cb615be1b4e40f.yaml b/releasenotes/notes/osd-check-fsid-mismatch-a5cb615be1b4e40f.yaml new file mode 100644 index 00000000..7f06bfff --- /dev/null +++ b/releasenotes/notes/osd-check-fsid-mismatch-a5cb615be1b4e40f.yaml @@ -0,0 +1,7 @@ +--- +features: + - The ceph cluster FSID is explictly added as a cluster option (--cluster-uuid) to ceph-disk prepare per OSD + - An additional check is done prior to preparing an OSD to verify the OSD is not already prepared with a different FSID, which is a symptom of trying to add an OSD from a different ceph cluster + - Prior to this change, a deploy might report it is successful even if all of the OSDs fail to activate. The logs will now indicate that the OSD activation failed because a different FSID was found so that the user may then choose zap away the old deploy +fixes: + - Bug 1604728 Puppet should exit with error if disk activate fails diff --git a/spec/defines/ceph_osd_spec.rb b/spec/defines/ceph_osd_spec.rb index 9ff8b8a4..4baabc92 100644 --- a/spec/defines/ceph_osd_spec.rb +++ b/spec/defines/ceph_osd_spec.rb @@ -51,7 +51,7 @@ if ! test -b /srv ; then chown -h ceph:ceph /srv fi fi -ceph-disk prepare /srv +ceph-disk prepare /srv udevadm settle ", 'unless' => "/bin/true # comment to satisfy puppet syntax requirements @@ -96,6 +96,7 @@ ls -ld /var/lib/ceph/osd/ceph-* | grep ' /srv\$' { :cluster => 'testcluster', :journal => '/srv/journal', + :fsid => 'f39ace04-f967-4c3d-9fd2-32af2d2d2cd5', } end @@ -112,6 +113,17 @@ test -f /usr/lib/udev/rules.d/95-ceph-osd.rules && test \$DISABLE_UDEV -eq 1 ", 'logoutput' => true, ) } + it { is_expected.to contain_exec('ceph-osd-check-fsid-mismatch-/srv/data').with( + 'command' => "/bin/true # comment to satisfy puppet syntax requirements +set -ex +test f39ace04-f967-4c3d-9fd2-32af2d2d2cd5 = \$(ceph-disk list /srv/data | egrep -o '[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}') +", + 'unless' => "/bin/true # comment to satisfy puppet syntax requirements +set -ex +test -z \$(ceph-disk list /srv/data | egrep -o '[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}') +", + 'logoutput' => true + ) } it { is_expected.to contain_exec('ceph-osd-prepare-/srv/data').with( 'command' => "/bin/true # comment to satisfy puppet syntax requirements set -ex @@ -121,7 +133,7 @@ if ! test -b /srv/data ; then chown -h ceph:ceph /srv/data fi fi -ceph-disk prepare --cluster testcluster /srv/data /srv/journal +ceph-disk prepare --cluster testcluster --cluster-uuid f39ace04-f967-4c3d-9fd2-32af2d2d2cd5 /srv/data /srv/journal udevadm settle ", 'unless' => "/bin/true # comment to satisfy puppet syntax requirements