Browse Source

Merge "Fix neutron-ha-tool for active/passive usage"

Jenkins 3 years ago
parent
commit
734f5f4e60
1 changed files with 35 additions and 1 deletions
  1. 35
    1
      ocf/neutron-ha-tool

+ 35
- 1
ocf/neutron-ha-tool View File

@@ -199,6 +199,26 @@ neutron_ha_tool_status() {
199 199
 }
200 200
 
201 201
 neutron_ha_tool_monitor() {
202
+    if ! [ -e "$statefile" ]; then
203
+        # neutron-ha-tool is run on a single node at a time, i.e. in
204
+        # active/passive mode.  So we use this state file to keep
205
+        # track of whether it's active on the current node, and if
206
+        # Pacemaker does a probe on a node where it's not active, we
207
+        # skip the l3-agent check and always return OCF_NOT_RUNNING,
208
+        # otherwise we'd get messages from pengine like:
209
+        #
210
+        #   error: Resource neutron-ha-tool (ocf::neutron-ha-tool) is active on
211
+        #       2 nodes attempting recovery
212
+        #   warning: See http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active
213
+        #       for more information.
214
+        #
215
+        # and Pacemaker could attempt unnecessary recovery according to the
216
+        # value of the cluster-wide "multiple-active" option.
217
+        ocf_log debug "neutron-ha-tool not currently active on this node; " \
218
+            "skipping l3-agent check"
219
+        return $OCF_NOT_RUNNING
220
+    fi
221
+
202 222
     INSECURE=""
203 223
     if ocf_is_true $OCF_RESKEY_os_insecure; then
204 224
         INSECURE="--insecure"
@@ -217,6 +237,12 @@ neutron_ha_tool_monitor() {
217 237
 }
218 238
 
219 239
 neutron_ha_tool_start() {
240
+    touch "$statefile"
241
+    if ! [ -e "$statefile" ]; then
242
+        ocf_log err "Failed to create $statefile - aborting!"
243
+        return $OCF_ERR_GENERIC
244
+    fi
245
+
220 246
     INSECURE=""
221 247
     if ocf_is_true $OCF_RESKEY_os_insecure; then
222 248
         INSECURE="--insecure"
@@ -245,7 +271,13 @@ neutron_ha_tool_start() {
245 271
 }
246 272
 
247 273
 neutron_ha_tool_stop() {
248
-    # This is a noop
274
+    rm -f "$statefile"
275
+    if [ -e "$statefile" ]; then
276
+        ocf_log err "Uh-oh - failed to remove $statefile!"
277
+        # If we can't even remove a file in tmpfs (/run), something
278
+        # is *really* badly wrong, so fence the node.
279
+        return $OCF_ERR_GENERIC
280
+    fi
249 281
     return $OCF_SUCCESS
250 282
 }
251 283
 
@@ -275,6 +307,8 @@ if [ -n "$OCF_RESKEY_os_cacert" ]; then
275 307
     export OS_CACERT=$OCF_RESKEY_os_cacert
276 308
 fi
277 309
 
310
+statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
311
+
278 312
 # What kind of method was invoked?
279 313
 case "$1" in
280 314
     start)

Loading…
Cancel
Save