Handle REST API timeouts gracefully in the VIM
The VIM is leaking FDs. The problem happens as follows: - The VIM has worker processes that are used to communicate with other processes through their REST APIs (e.g. sysinv, nova, cinder). The VIM does not specify a timeout when sending REST API requests. - The VIM does have a timeout for how long a worker process takes to process a request, which can vary depending on the request. - If the worker process sends a REST API request and does not get a response in time (e.g. because a message is lost or the target process is down), the VIM terminates the worker process. This is being done with a call to Process.terminate in the python multiprocessing library. The docs for this library clearly indicate that Process.terminate should not be used for a process that uses any shared resources (e.g. pipes). In this case, the worker processes are using shared resources (pipes for one) and these resources are not freed, leading to the FD leak. The solution is to ensure that a timeout is set when sending REST API requests. This timeout must be less than the worker timeout to ensure that the workers do not timeout (and leak FDs) except in the rarest of cases. Change-Id: Iccff914e86224be96689738cdcc536a4d5acb861 Closes-Bug: 1862049 Signed-off-by: Bart Wensley <barton.wensley@windriver.com>
This commit is contained in:
parent
6817c1cc15
commit
ccd59a0711
|
@ -1 +1 @@
|
|||
TIS_PATCH_VER=79
|
||||
TIS_PATCH_VER=80
|
||||
|
|
|
@ -55,11 +55,26 @@ class TaskFuture(object):
|
|||
del kwargs['timeout_in_secs']
|
||||
|
||||
if timeout_in_secs is None:
|
||||
# WARNING: Any change to the default timeout must be reflected in
|
||||
# the timeouts used for any work being done.
|
||||
timeout_in_secs = 20
|
||||
|
||||
elif 0 >= timeout_in_secs:
|
||||
timeout_in_secs = None # No timeout wanted, wait forever
|
||||
|
||||
# Note about timeouts. When the timeout expires, the VIM will terminate
|
||||
# the worker process doing the work. Unfortunately, the python
|
||||
# multiprocessing library used to manage these processes results in
|
||||
# leaked file descriptors each time a process is terminated. That
|
||||
# means this timeout should be a last resort - the work being done
|
||||
# (e.g. sending a REST API request) must have its own timeout
|
||||
# mechanism to ensure it completes before the worker process times
|
||||
# out. Adding 5 seconds to the configured (or default) timeout to
|
||||
# ensure the underlying timeout mechanism has the opportunity to
|
||||
# abort the work being done.
|
||||
if timeout_in_secs is not None:
|
||||
timeout_in_secs += 5
|
||||
|
||||
if self._scheduler.running_task is not None:
|
||||
task_work = TaskWork(timeout_in_secs, target, *args, **kwargs)
|
||||
self._scheduler.running_task.add_task_work(task_work)
|
||||
|
|
|
@ -115,6 +115,8 @@ max_request_wait_in_secs=45
|
|||
host=127.0.0.1
|
||||
port=30004
|
||||
|
||||
# WARNING: Any changes to these timeouts must be reflected in the timeouts
|
||||
# used for the associated REST API calls.
|
||||
[nfvi-timeouts]
|
||||
openstack.get_token=10
|
||||
neutron.disable_host_services=40
|
||||
|
|
|
@ -204,8 +204,11 @@ def upload_image_data_by_url(token, image_id, image_data_url):
|
|||
operations.append(operation)
|
||||
api_cmd_payload = operations
|
||||
|
||||
# WARNING: Any change to the timeout must be reflected in the config.ini
|
||||
# file for the nfvi plugins.
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=180)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -227,8 +230,10 @@ def upload_image_data_by_file(token, image_id, image_file):
|
|||
file = open(image_file, "rb")
|
||||
api_cmd_payload = file
|
||||
try:
|
||||
# WARNING: Any change to the timeout must be reflected in the config.ini
|
||||
# file for the nfvi plugins.
|
||||
response = rest_api_request(token, "PUT", api_cmd, api_cmd_headers,
|
||||
api_cmd_payload)
|
||||
api_cmd_payload, timeout_in_secs=180)
|
||||
finally:
|
||||
file.close()
|
||||
|
||||
|
|
|
@ -569,7 +569,10 @@ def delete_host_services(token, host_uuid):
|
|||
|
||||
api_cmd_headers = dict()
|
||||
|
||||
response = rest_api_request(token, "DELETE", api_cmd, api_cmd_headers)
|
||||
# WARNING: Any change to the timeout must be reflected in the config.ini
|
||||
# file for the nfvi plugins.
|
||||
response = rest_api_request(token, "DELETE", api_cmd, api_cmd_headers,
|
||||
timeout_in_secs=40)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -724,8 +727,11 @@ def disable_host_services(token, host_uuid):
|
|||
api_cmd_payload = dict()
|
||||
api_cmd_payload['host'] = payload
|
||||
|
||||
# WARNING: Any change to the timeout must be reflected in the config.ini
|
||||
# file for the nfvi plugins.
|
||||
response = rest_api_request(token, "PUT", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=40)
|
||||
return response
|
||||
|
||||
|
||||
|
|
|
@ -60,7 +60,9 @@ def get_token(directory):
|
|||
}}}})
|
||||
request_info.add_data(payload)
|
||||
|
||||
request = urllib.request.urlopen(request_info)
|
||||
# WARNING: Any change to the timeout must be reflected in the config.ini
|
||||
# file for the nfvi plugins.
|
||||
request = urllib.request.urlopen(request_info, timeout=10)
|
||||
# Identity API v3 returns token id in X-Subject-Token
|
||||
# response header.
|
||||
token_id = request.info().getheader('X-Subject-Token')
|
||||
|
|
|
@ -287,8 +287,8 @@ def rest_api_get_server(host, port):
|
|||
return RestAPIServer(host, port)
|
||||
|
||||
|
||||
def _rest_api_request(token_id, method, api_cmd, api_cmd_headers=None,
|
||||
api_cmd_payload=None):
|
||||
def _rest_api_request(token_id, method, api_cmd, api_cmd_headers,
|
||||
api_cmd_payload, timeout_in_secs):
|
||||
"""
|
||||
Internal: make a rest-api request
|
||||
"""
|
||||
|
@ -320,7 +320,7 @@ def _rest_api_request(token_id, method, api_cmd, api_cmd_headers=None,
|
|||
# opener = urllib.request.build_opener(handler)
|
||||
# urllib.request.install_opener(opener)
|
||||
|
||||
request = urllib.request.urlopen(request_info)
|
||||
request = urllib.request.urlopen(request_info, timeout=timeout_in_secs)
|
||||
|
||||
headers = list() # list of tuples
|
||||
for key, value in request.info().items():
|
||||
|
@ -424,15 +424,29 @@ def _rest_api_request(token_id, method, api_cmd, api_cmd_headers=None,
|
|||
raise OpenStackException(method, api_cmd, api_cmd_headers,
|
||||
api_cmd_payload, str(e), str(e))
|
||||
|
||||
except Exception as e:
|
||||
now_ms = timers.get_monotonic_timestamp_in_ms()
|
||||
elapsed_ms = now_ms - start_ms
|
||||
|
||||
log_error("Rest-API failure, %s, %s, hdrs=%s, payload=%s, elapsed_ms=%s"
|
||||
% (method, api_cmd, api_cmd_headers, api_cmd_payload,
|
||||
int(elapsed_ms)))
|
||||
|
||||
raise OpenStackException(method, api_cmd, api_cmd_headers,
|
||||
api_cmd_payload, str(e), str(e))
|
||||
|
||||
|
||||
def rest_api_request(token, method, api_cmd, api_cmd_headers=None,
|
||||
api_cmd_payload=None):
|
||||
api_cmd_payload=None, timeout_in_secs=20):
|
||||
"""
|
||||
Make a rest-api request using the given token
|
||||
WARNING: Any change to the default timeout must be reflected in the timeout
|
||||
calculations done in the TaskFuture class.
|
||||
"""
|
||||
try:
|
||||
return _rest_api_request(token.get_id(), method, api_cmd,
|
||||
api_cmd_headers, api_cmd_payload)
|
||||
api_cmd_headers, api_cmd_payload,
|
||||
timeout_in_secs)
|
||||
|
||||
except OpenStackRestAPIException as e:
|
||||
if httplib.UNAUTHORIZED == e.http_status_code:
|
||||
|
@ -441,9 +455,12 @@ def rest_api_request(token, method, api_cmd, api_cmd_headers=None,
|
|||
|
||||
|
||||
def rest_api_request_with_context(context, method, api_cmd,
|
||||
api_cmd_headers=None, api_cmd_payload=None):
|
||||
api_cmd_headers=None, api_cmd_payload=None,
|
||||
timeout_in_secs=20):
|
||||
"""
|
||||
Make a rest-api request using the given context
|
||||
WARNING: Any change to the default timeout must be reflected in the timeout
|
||||
calculations done in the TaskFuture class.
|
||||
"""
|
||||
return _rest_api_request(context.token_id, method, api_cmd, api_cmd_headers,
|
||||
api_cmd_payload)
|
||||
api_cmd_payload, timeout_in_secs)
|
||||
|
|
|
@ -11,6 +11,10 @@ from nfv_plugins.nfvi_plugins.openstack.rest_api import rest_api_request
|
|||
|
||||
DLOG = debug.debug_get_logger('nfv_plugins.nfvi_plugins.openstack.sysinv')
|
||||
|
||||
# WARNING: Any change to this timeout must be reflected in the config.ini
|
||||
# file for the nfvi plugins.
|
||||
REST_API_REQUEST_TIMEOUT = 45
|
||||
|
||||
|
||||
def get_datanetworks(token, host_uuid):
|
||||
"""
|
||||
|
@ -25,7 +29,8 @@ def get_datanetworks(token, host_uuid):
|
|||
api_cmd_headers['Content-Type'] = "application/json"
|
||||
api_cmd_headers['User-Agent'] = "vim/1.0"
|
||||
|
||||
response = rest_api_request(token, "GET", api_cmd, api_cmd_headers)
|
||||
response = rest_api_request(token, "GET", api_cmd, api_cmd_headers,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
result_data = response.result_data['interface_datanetworks']
|
||||
|
||||
return result_data
|
||||
|
@ -42,7 +47,8 @@ def get_system_info(token):
|
|||
|
||||
api_cmd = url + "/isystems"
|
||||
|
||||
response = rest_api_request(token, "GET", api_cmd)
|
||||
response = rest_api_request(token, "GET", api_cmd,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -56,7 +62,8 @@ def get_hosts(token):
|
|||
|
||||
api_cmd = url + "/ihosts"
|
||||
|
||||
response = rest_api_request(token, "GET", api_cmd)
|
||||
response = rest_api_request(token, "GET", api_cmd,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -70,7 +77,8 @@ def get_host(token, host_uuid):
|
|||
|
||||
api_cmd = url + "/ihosts/%s" % host_uuid
|
||||
|
||||
response = rest_api_request(token, "GET", api_cmd)
|
||||
response = rest_api_request(token, "GET", api_cmd,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -84,7 +92,8 @@ def get_host_labels(token, host_uuid):
|
|||
|
||||
api_cmd = url + "/ihosts/%s/labels" % host_uuid
|
||||
|
||||
response = rest_api_request(token, "GET", api_cmd)
|
||||
response = rest_api_request(token, "GET", api_cmd,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -98,7 +107,8 @@ def get_upgrade(token):
|
|||
|
||||
api_cmd = url + "/upgrade"
|
||||
|
||||
response = rest_api_request(token, "GET", api_cmd)
|
||||
response = rest_api_request(token, "GET", api_cmd,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -120,7 +130,8 @@ def upgrade_start(token):
|
|||
api_cmd_payload['force'] = "false"
|
||||
|
||||
response = rest_api_request(token, "POST", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -147,7 +158,8 @@ def upgrade_activate(token):
|
|||
api_cmd_payload.append(host_data)
|
||||
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -165,7 +177,8 @@ def upgrade_complete(token):
|
|||
api_cmd_headers['Content-Type'] = "application/json"
|
||||
api_cmd_headers['User-Agent'] = "vim/1.0"
|
||||
|
||||
response = rest_api_request(token, "DELETE", api_cmd, api_cmd_headers)
|
||||
response = rest_api_request(token, "DELETE", api_cmd, api_cmd_headers,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -179,7 +192,8 @@ def get_host_lvgs(token, host_uuid):
|
|||
|
||||
api_cmd = url + "/ihosts/%s/ilvgs" % host_uuid
|
||||
|
||||
response = rest_api_request(token, "GET", api_cmd)
|
||||
response = rest_api_request(token, "GET", api_cmd,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -206,7 +220,8 @@ def notify_host_services_enabled(token, host_uuid):
|
|||
api_cmd_list.append(api_cmd_payload)
|
||||
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_list))
|
||||
json.dumps(api_cmd_list),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -233,7 +248,8 @@ def notify_host_services_disabled(token, host_uuid):
|
|||
api_cmd_list.append(api_cmd_payload)
|
||||
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_list))
|
||||
json.dumps(api_cmd_list),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -260,7 +276,8 @@ def notify_host_services_disable_extend(token, host_uuid):
|
|||
api_cmd_list.append(api_cmd_payload_action)
|
||||
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_list))
|
||||
json.dumps(api_cmd_list),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -293,7 +310,8 @@ def notify_host_services_disable_failed(token, host_uuid, reason):
|
|||
api_cmd_list.append(api_cmd_payload_reason)
|
||||
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_list))
|
||||
json.dumps(api_cmd_list),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -311,7 +329,8 @@ def notify_host_services_deleted(token, host_uuid):
|
|||
api_cmd_headers['Content-Type'] = "application/json"
|
||||
api_cmd_headers['User-Agent'] = "vim/1.0"
|
||||
|
||||
response = rest_api_request(token, "DELETE", api_cmd, api_cmd_headers)
|
||||
response = rest_api_request(token, "DELETE", api_cmd, api_cmd_headers,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -344,7 +363,8 @@ def notify_host_services_delete_failed(token, host_uuid, reason):
|
|||
api_cmd_list.append(api_cmd_payload_reason)
|
||||
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_list))
|
||||
json.dumps(api_cmd_list),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -371,7 +391,8 @@ def lock_host(token, host_uuid):
|
|||
api_cmd_payload.append(host_data)
|
||||
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -398,7 +419,8 @@ def unlock_host(token, host_uuid):
|
|||
api_cmd_payload.append(host_data)
|
||||
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -425,7 +447,8 @@ def reboot_host(token, host_uuid):
|
|||
api_cmd_payload.append(host_data)
|
||||
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -447,7 +470,8 @@ def upgrade_host(token, host_uuid):
|
|||
api_cmd_payload['force'] = "false"
|
||||
|
||||
response = rest_api_request(token, "POST", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -474,7 +498,8 @@ def swact_from_host(token, host_uuid):
|
|||
api_cmd_payload.append(host_data)
|
||||
|
||||
response = rest_api_request(token, "PATCH", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -492,7 +517,8 @@ def get_host_devices(token, host_uuid):
|
|||
api_cmd_headers['Content-Type'] = "application/json"
|
||||
api_cmd_headers['User-Agent'] = "vim/1.0"
|
||||
|
||||
response = rest_api_request(token, "GET", api_cmd, api_cmd_headers)
|
||||
response = rest_api_request(token, "GET", api_cmd, api_cmd_headers,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -510,7 +536,8 @@ def get_host_device(token, device_uuid):
|
|||
api_cmd_headers['Content-Type'] = "application/json"
|
||||
api_cmd_headers['User-Agent'] = "vim/1.0"
|
||||
|
||||
response = rest_api_request(token, "GET", api_cmd, api_cmd_headers)
|
||||
response = rest_api_request(token, "GET", api_cmd, api_cmd_headers,
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -531,7 +558,8 @@ def host_device_image_update(token, host_uuid):
|
|||
api_cmd_payload = dict()
|
||||
|
||||
response = rest_api_request(token, "POST", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
||||
|
||||
|
@ -552,5 +580,6 @@ def host_device_image_update_abort(token, host_uuid):
|
|||
api_cmd_payload = dict()
|
||||
|
||||
response = rest_api_request(token, "POST", api_cmd, api_cmd_headers,
|
||||
json.dumps(api_cmd_payload))
|
||||
json.dumps(api_cmd_payload),
|
||||
timeout_in_secs=REST_API_REQUEST_TIMEOUT)
|
||||
return response
|
||||
|
|
Loading…
Reference in New Issue