Only fail requests if no cloud can service them
Previously if we had cloud errors when attempting to handle a request in a particular cloud we treated that node request as failed even if other clouds had still not attempted to fulfill the request. This is problematic because clouds have outages and this is the reason nodepool supports speaking to more than one cloud at a time. We fix this by only marking the request failed if all other providers have attempted to fulfill it. Otherwise we put it back in the requested state and let other providers have a go. Change-Id: I519cdd2401f77cad97d60329e207dddafc8cd5a4
This commit is contained in:
@@ -134,6 +134,17 @@ class NodeRequestHandler(object):
|
||||
|
||||
self.nodeset = []
|
||||
|
||||
def decline_request(self):
|
||||
self.request.declined_by.append(self.launcher_id)
|
||||
launchers = set(self.zk.getRegisteredLaunchers())
|
||||
if launchers.issubset(set(self.request.declined_by)):
|
||||
# All launchers have declined it
|
||||
self.log.debug("Failing declined node request %s",
|
||||
self.request.id)
|
||||
self.request.state = zk.FAILED
|
||||
else:
|
||||
self.request.state = zk.REQUESTED
|
||||
|
||||
def run(self):
|
||||
'''
|
||||
Execute node request handling.
|
||||
@@ -147,10 +158,10 @@ class NodeRequestHandler(object):
|
||||
self.run_handler()
|
||||
except Exception:
|
||||
self.log.exception(
|
||||
"Exception in NodeRequestHandler for request %s:",
|
||||
self.request.id)
|
||||
"Declining node request %s due to exception in "
|
||||
"NodeRequestHandler:", self.request.id)
|
||||
self.decline_request()
|
||||
self.unlockNodeSet(clear_allocation=True)
|
||||
self.request.state = zk.FAILED
|
||||
self.zk.storeNodeRequest(self.request)
|
||||
self.zk.unlockNodeRequest(self.request)
|
||||
self.done = True
|
||||
@@ -196,15 +207,7 @@ class NodeRequestHandler(object):
|
||||
if self.launch_manager.failed_nodes:
|
||||
self.log.debug("Declining node request %s because nodes failed",
|
||||
self.request.id)
|
||||
self.request.declined_by.append(self.launcher_id)
|
||||
launchers = set(self.zk.getRegisteredLaunchers())
|
||||
if launchers.issubset(set(self.request.declined_by)):
|
||||
# All launchers have declined it
|
||||
self.log.debug("Failing declined node request %s",
|
||||
self.request.id)
|
||||
self.request.state = zk.FAILED
|
||||
else:
|
||||
self.request.state = zk.REQUESTED
|
||||
self.decline_request()
|
||||
else:
|
||||
# The assigned nodes must be added to the request in the order
|
||||
# in which they were requested.
|
||||
|
||||
@@ -563,22 +563,13 @@ class OpenStackNodeRequestHandler(NodeRequestHandler):
|
||||
if declined_reasons:
|
||||
self.log.debug("Declining node request %s because %s",
|
||||
self.request.id, ', '.join(declined_reasons))
|
||||
self.request.declined_by.append(self.launcher_id)
|
||||
launchers = set(self.zk.getRegisteredLaunchers())
|
||||
if launchers.issubset(set(self.request.declined_by)):
|
||||
self.log.debug("Failing declined node request %s",
|
||||
self.request.id)
|
||||
# All launchers have declined it
|
||||
self.request.state = zk.FAILED
|
||||
self.decline_request()
|
||||
self.unlockNodeSet(clear_allocation=True)
|
||||
|
||||
# If conditions have changed for a paused request to now cause us
|
||||
# to decline it, we need to unpause so we don't keep trying it
|
||||
if self.paused:
|
||||
self.paused = False
|
||||
# If we didn't mark the request as failed above, reset it.
|
||||
if self.request.state != zk.FAILED:
|
||||
self.request.state = zk.REQUESTED
|
||||
|
||||
self.zk.storeNodeRequest(self.request)
|
||||
self.zk.unlockNodeRequest(self.request)
|
||||
|
||||
Reference in New Issue
Block a user