Fix floating problem with OSD down

* Add the OSD activation and check if osd process for each OSD is started. * Change the process of adding OSDs to cluster. Now Ceph OSDs are added one by one instead of by all together. This allows to check the every OSD status during adding to cluster. Change-Id: I8e64c1b15ed92e6fb5939b6f41728efacae64319 Closes-bug: #1419884
2015-03-31 09:07:18 +00:00 · 2015-03-31 09:07:18 +00:00 · c20b2f8463
commit c20b2f8463
parent 80e358d511
4 changed files with 46 additions and 33 deletions
--- a/deployment/puppet/ceph/manifests/init.pp
+++ b/deployment/puppet/ceph/manifests/init.pp
@ -132,8 +132,8 @@ class ceph (

    'ceph-osd': {
      if ! empty($osd_devices) {
-        include ceph::osd
-        Class['ceph::conf'] -> Class['ceph::osd'] ~> Service['ceph']
+        include ceph::osds
+        Class['ceph::conf'] -> Class['ceph::osds'] ~> Service['ceph']
      }
    }

--- a/deployment/puppet/ceph/manifests/osd.pp
+++ b/deployment/puppet/ceph/manifests/osd.pp
@ -1,31 +0,0 @@
-# prepare and bring online the devices listed in $::ceph::osd_devices
-class ceph::osd (
-  $devices = join(prefix($::ceph::osd_devices, "${::hostname}:"), ' '),
-){
-  firewall {'011 ceph-osd allow':
-    chain   => 'INPUT',
-    dport   => '6800-7100',
-    proto   => 'tcp',
-    action  => accept,
-  }
-
-  exec { 'ceph-deploy osd prepare':
-    # ceph-deploy osd prepare is ensuring there is a filesystem on the
-    # disk according to the args passed to ceph.conf (above).
-    #
-    # It has a long timeout because of the format taking forever. A
-    # resonable amount of time would be around 300 times the length of
-    # $osd_nodes. Right now its 0 to prevent puppet from aborting it.
-
-    command   => "ceph-deploy osd prepare ${devices}",
-    returns   => 0,
-    timeout   => 0, # TODO: make this something reasonable
-    tries     => 2, # This is necessary because of race for mon creating keys
-    try_sleep => 1,
-    logoutput => true,
-    unless    => "grep -q '^${ $::ceph::osd_devices[0] }' /proc/mounts",
-  }
-
-  Firewall['011 ceph-osd allow'] ->
-  Exec['ceph-deploy osd prepare']
-}
--- a/deployment/puppet/ceph/manifests/osds.pp
+++ b/deployment/puppet/ceph/manifests/osds.pp
@ -0,0 +1,15 @@
+# prepare and bring online the devices listed in $::ceph::osd_devices
+class ceph::osds (
+  $devices = $::ceph::osd_devices,
+){
+
+  firewall { '011 ceph-osd allow':
+    chain  => 'INPUT',
+    dport  => '6800-7100',
+    proto  => 'tcp',
+    action => accept,
+  } ->
+
+  ceph::osds::osd{ $devices: }
+
+}
--- a/deployment/puppet/ceph/manifests/osds/osd.pp
+++ b/deployment/puppet/ceph/manifests/osds/osd.pp
@ -0,0 +1,29 @@
+define ceph::osds::osd () {
+
+  $deploy_device_name = "${::hostname}:${name}"
+
+  exec { "ceph-deploy osd prepare ${deploy_device_name}":
+    # ceph-deploy osd prepare is ensuring there is a filesystem on the
+    # disk according to the args passed to ceph.conf (above).
+    #
+    # It has a long timeout because of the format taking forever. A
+    # resonable amount of time would be around 300 times the length of
+    # $osd_nodes. Right now its 0 to prevent puppet from aborting it.
+    command   => "ceph-deploy osd prepare ${deploy_device_name}",
+    returns   => 0,
+    timeout   => 0, # TODO: make this something reasonable
+    tries     => 2, # This is necessary because of race for mon creating keys
+    try_sleep => 1,
+    logoutput => true,
+    unless    => "grep -q ${name} /proc/mounts",
+  } ->
+
+  exec { "ceph-deploy osd activate ${deploy_device_name}":
+    command   => "ceph-deploy osd activate ${deploy_device_name}",
+    try_sleep => 10,
+    tries     => 6,
+    logoutput => true,
+    unless    => "ceph osd dump | grep -q \"osd.$(sed -nEe 's|${name}\ .*ceph-([0-9]+).*$|\1|p' /proc/mounts)\ up\ .*\ in\ \"",
+  }
+
+}