Merge "Apply NoExecute taint to locked nodes"
This commit is contained in:
commit
97524fee3a
@ -893,34 +893,21 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
|
||||
future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
|
||||
|
||||
if self._host_supports_kubernetes(host_personality):
|
||||
if True:
|
||||
# For now, we do not want to apply the NoExecute taint.
|
||||
# When the VIM detects that a service is failed on a host,
|
||||
# it goes through a disable/enable cycle. This would cause
|
||||
# the NoExecute taint to be applied/removed which causes
|
||||
# most pods to be stopped/started. If the pods don't come
|
||||
# back quickly enough the VIM will attempt another
|
||||
# disable/enable, which can go on forever. For now,
|
||||
# we will just avoid tainting hosts.
|
||||
# TODO(bwensley): Rework when support for pure k8s hosts is
|
||||
# added.
|
||||
pass
|
||||
else:
|
||||
response['reason'] = 'failed to disable kubernetes services'
|
||||
response['reason'] = 'failed to disable kubernetes services'
|
||||
|
||||
# To disable kubernetes we add the NoExecute taint to the
|
||||
# node. This removes pods that can be scheduled elsewhere
|
||||
# and prevents new pods from scheduling on the node.
|
||||
future.work(kubernetes_client.taint_node,
|
||||
host_name, "NoExecute", "services", "disabled")
|
||||
# To disable kubernetes we add the NoExecute taint to the
|
||||
# node. This removes pods that can be scheduled elsewhere
|
||||
# and prevents new pods from scheduling on the node.
|
||||
future.work(kubernetes_client.taint_node,
|
||||
host_name, "NoExecute", "services", "disabled")
|
||||
|
||||
future.result = (yield)
|
||||
future.result = (yield)
|
||||
|
||||
if not future.result.is_complete():
|
||||
DLOG.error("Kubernetes taint_node failed, operation "
|
||||
"did not complete, host_uuid=%s, host_name=%s."
|
||||
% (host_uuid, host_name))
|
||||
return
|
||||
if not future.result.is_complete():
|
||||
DLOG.error("Kubernetes taint_node failed, operation "
|
||||
"did not complete, host_uuid=%s, host_name=%s."
|
||||
% (host_uuid, host_name))
|
||||
return
|
||||
|
||||
response['completed'] = True
|
||||
response['reason'] = ''
|
||||
|
@ -669,49 +669,45 @@ def query_network_agents(token, host_name, check_fully_up):
|
||||
Input parameter check_fully_up set to True will check for
|
||||
both alive and admin_state_up, otherwise only alive is checked.
|
||||
"""
|
||||
try:
|
||||
url, api_cmd, api_cmd_headers, result_data = get_network_agents(
|
||||
token, host_name)
|
||||
url, api_cmd, api_cmd_headers, result_data = get_network_agents(
|
||||
token, host_name)
|
||||
|
||||
agent_state = 'up'
|
||||
supported_agents = [AGENT_TYPE.L3, AGENT_TYPE.DHCP]
|
||||
for supported_agent in supported_agents:
|
||||
found = False
|
||||
for agent in result_data:
|
||||
agent_type = agent.get('agent_type', '')
|
||||
host = agent.get('host', '')
|
||||
if (agent_type == supported_agent) and (host == host_name):
|
||||
DLOG.verbose("found agent %s for host %s" %
|
||||
(supported_agent, host_name))
|
||||
alive = agent.get('alive', False)
|
||||
admin_state_up = agent.get('admin_state_up', False)
|
||||
# found the agent of interest.
|
||||
found = True
|
||||
break
|
||||
if found:
|
||||
if check_fully_up:
|
||||
if not (alive and admin_state_up):
|
||||
DLOG.verbose("host %s agent %s not fully up. alive: %s,"
|
||||
" admin_state_up: %s" %
|
||||
(host_name, supported_agent,
|
||||
alive, admin_state_up))
|
||||
agent_state = 'down'
|
||||
break
|
||||
else:
|
||||
if not alive:
|
||||
DLOG.verbose("host %s agent %s not alive" %
|
||||
(host_name, supported_agent))
|
||||
agent_state = 'down'
|
||||
break
|
||||
else:
|
||||
DLOG.error("host %s agent %s not present" %
|
||||
(host_name, supported_agent))
|
||||
agent_state = 'down'
|
||||
agent_state = 'up'
|
||||
alive = False
|
||||
admin_state_up = False
|
||||
supported_agents = [AGENT_TYPE.L3, AGENT_TYPE.DHCP]
|
||||
for supported_agent in supported_agents:
|
||||
found = False
|
||||
for agent in result_data:
|
||||
agent_type = agent.get('agent_type', '')
|
||||
host = agent.get('host', '')
|
||||
if (agent_type == supported_agent) and (host == host_name):
|
||||
DLOG.verbose("found agent %s for host %s" %
|
||||
(supported_agent, host_name))
|
||||
alive = agent.get('alive', False)
|
||||
admin_state_up = agent.get('admin_state_up', False)
|
||||
# found the agent of interest.
|
||||
found = True
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
DLOG.exception("Caught exception trying to query host %s "
|
||||
"agent states: %s" % (host_name, e))
|
||||
agent_state = 'down'
|
||||
if found:
|
||||
if check_fully_up:
|
||||
if not (alive and admin_state_up):
|
||||
DLOG.verbose("host %s agent %s not fully up. alive: %s,"
|
||||
" admin_state_up: %s" %
|
||||
(host_name, supported_agent,
|
||||
alive, admin_state_up))
|
||||
agent_state = 'down'
|
||||
break
|
||||
else:
|
||||
if not alive:
|
||||
DLOG.verbose("host %s agent %s not alive" %
|
||||
(host_name, supported_agent))
|
||||
agent_state = 'down'
|
||||
break
|
||||
else:
|
||||
DLOG.error("host %s agent %s not present" %
|
||||
(host_name, supported_agent))
|
||||
agent_state = 'down'
|
||||
break
|
||||
|
||||
return agent_state
|
||||
|
@ -38,6 +38,13 @@ class SwMgmtDirector(object):
|
||||
"""
|
||||
return self._sw_update
|
||||
|
||||
@property
|
||||
def single_controller(self):
|
||||
"""
|
||||
Returns whether this is a single controller configuration
|
||||
"""
|
||||
return self._single_controller
|
||||
|
||||
def create_sw_patch_strategy(self, controller_apply_type, storage_apply_type,
|
||||
swift_apply_type, worker_apply_type,
|
||||
max_parallel_worker_hosts,
|
||||
|
@ -234,9 +234,6 @@ class DisableHostTask(state_machine.StateTask):
|
||||
if host.host_service_configured(objects.HOST_SERVICES.GUEST):
|
||||
task_work_list.append(DisableHostServicesTaskWork(
|
||||
self, host, objects.HOST_SERVICES.GUEST))
|
||||
if host.host_service_configured(objects.HOST_SERVICES.CONTAINER):
|
||||
task_work_list.append(DisableHostServicesTaskWork(
|
||||
self, host, objects.HOST_SERVICES.CONTAINER))
|
||||
if host.host_service_configured(objects.HOST_SERVICES.COMPUTE):
|
||||
task_work_list.append(QueryHypervisorTaskWork(
|
||||
self, host, force_pass=True))
|
||||
@ -248,6 +245,17 @@ class DisableHostTask(state_machine.StateTask):
|
||||
task_work_list.append(NotifyHostDisabledTaskWork(
|
||||
self, host, objects.HOST_SERVICES.NETWORK))
|
||||
task_work_list.append(NotifyInstancesHostDisabledTaskWork(self, host))
|
||||
if host.host_service_configured(objects.HOST_SERVICES.CONTAINER):
|
||||
# Only disable the container services if the host is being locked
|
||||
# and we are not running in a single controller configuration. In
|
||||
# a single controller configuration we keep the container services
|
||||
# running.
|
||||
if self._host.is_locking():
|
||||
from nfv_vim import directors
|
||||
sw_mgmt_director = directors.get_sw_mgmt_director()
|
||||
if not sw_mgmt_director.single_controller:
|
||||
task_work_list.append(DisableHostServicesTaskWork(
|
||||
self, host, objects.HOST_SERVICES.CONTAINER))
|
||||
task_work_list.append(notify_host_services_task(
|
||||
self, host, force_pass=True))
|
||||
if host.host_service_configured(objects.HOST_SERVICES.COMPUTE):
|
||||
@ -443,8 +451,21 @@ class NotifyDisabledHostTask(state_machine.StateTask):
|
||||
Notify Disabled Host Task
|
||||
"""
|
||||
def __init__(self, host):
|
||||
from nfv_vim import objects
|
||||
|
||||
self._host_reference = weakref.ref(host)
|
||||
task_work_list = list()
|
||||
if host.host_service_configured(objects.HOST_SERVICES.CONTAINER):
|
||||
# Only disable the container services if the host is being locked
|
||||
# and we are not running in a single controller configuration. In
|
||||
# a single controller configuration we keep the container services
|
||||
# running.
|
||||
if self._host.is_locking():
|
||||
from nfv_vim import directors
|
||||
sw_mgmt_director = directors.get_sw_mgmt_director()
|
||||
if not sw_mgmt_director.single_controller:
|
||||
task_work_list.append(DisableHostServicesTaskWork(
|
||||
self, host, objects.HOST_SERVICES.CONTAINER))
|
||||
task_work_list.append(NotifyHostServicesDisabledTaskWork(
|
||||
self, host, force_pass=True))
|
||||
super(NotifyDisabledHostTask, self).__init__(
|
||||
|
Loading…
Reference in New Issue
Block a user