Implement poll util and plugin poll util
Implemented polling utils module. Also added simple usage of plugin util for decommissioning datanodes for Spark plugin. partially implements bp: add-timeouts-for-polling Change-Id: I62c4cd92925d0d09bd529cb1103519d3d84c2f1f
This commit is contained in:
parent
a0d6a938e2
commit
51eac34f2a
|
@ -284,14 +284,20 @@ class TimeoutException(SaharaException):
|
||||||
code = "TIMEOUT"
|
code = "TIMEOUT"
|
||||||
message = _("'%(operation)s' timed out after %(timeout)i second(s)")
|
message = _("'%(operation)s' timed out after %(timeout)i second(s)")
|
||||||
|
|
||||||
def __init__(self, timeout, op_name=None):
|
def __init__(self, timeout, op_name=None, timeout_name=None):
|
||||||
if op_name:
|
if op_name:
|
||||||
op_name = _("Operation '%s'") % op_name
|
op_name = _("Operation with name '%s'") % op_name
|
||||||
else:
|
else:
|
||||||
op_name = _("Operation")
|
op_name = _("Operation")
|
||||||
self.message = self.message % {
|
self.message = self.message % {
|
||||||
'operation': op_name, 'timeout': timeout}
|
'operation': op_name, 'timeout': timeout}
|
||||||
|
|
||||||
|
if timeout_name:
|
||||||
|
desc = _("%(message)s and following timeout was violated: "
|
||||||
|
"%(timeout_name)s")
|
||||||
|
self.message = desc % {
|
||||||
|
'message': self.message, 'timeout_name': timeout_name}
|
||||||
|
|
||||||
super(TimeoutException, self).__init__()
|
super(TimeoutException, self).__init__()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,16 +15,15 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from oslo_utils import timeutils
|
|
||||||
import six
|
import six
|
||||||
|
|
||||||
from sahara import context
|
from sahara import context
|
||||||
from sahara.i18n import _
|
from sahara.i18n import _
|
||||||
from sahara.plugins import exceptions as ex
|
|
||||||
from sahara.plugins.spark import config_helper as c_helper
|
from sahara.plugins.spark import config_helper as c_helper
|
||||||
from sahara.plugins.spark import run_scripts as run
|
from sahara.plugins.spark import run_scripts as run
|
||||||
from sahara.plugins import utils
|
from sahara.plugins import utils
|
||||||
from sahara.utils import cluster_progress_ops as cpo
|
from sahara.utils import cluster_progress_ops as cpo
|
||||||
|
from sahara.utils import poll_utils
|
||||||
from sahara.utils import remote
|
from sahara.utils import remote
|
||||||
|
|
||||||
|
|
||||||
|
@ -55,6 +54,17 @@ def decommission_sl(master, inst_to_be_deleted, survived_inst):
|
||||||
run.start_spark_master(r_master, sp_home)
|
run.start_spark_master(r_master, sp_home)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_decommissioned(r, inst_to_be_deleted):
|
||||||
|
cmd = r.execute_command("sudo -u hdfs hadoop dfsadmin -report")
|
||||||
|
datanodes_info = parse_dfs_report(cmd[1])
|
||||||
|
for i in inst_to_be_deleted:
|
||||||
|
for dn in datanodes_info:
|
||||||
|
if (dn["Name"].startswith(i.internal_ip)) and (
|
||||||
|
dn["Decommission Status"] != "Decommissioned"):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
@cpo.event_wrapper(True, step=_("Decommission %s") % "DataNodes")
|
@cpo.event_wrapper(True, step=_("Decommission %s") % "DataNodes")
|
||||||
def decommission_dn(nn, inst_to_be_deleted, survived_inst):
|
def decommission_dn(nn, inst_to_be_deleted, survived_inst):
|
||||||
with remote.get_remote(nn) as r:
|
with remote.get_remote(nn) as r:
|
||||||
|
@ -64,36 +74,15 @@ def decommission_dn(nn, inst_to_be_deleted, survived_inst):
|
||||||
run.refresh_nodes(remote.get_remote(nn), "dfsadmin")
|
run.refresh_nodes(remote.get_remote(nn), "dfsadmin")
|
||||||
context.sleep(3)
|
context.sleep(3)
|
||||||
|
|
||||||
timeout = c_helper.get_decommissioning_timeout(nn.cluster)
|
poll_utils.plugin_option_poll(
|
||||||
s_time = timeutils.utcnow()
|
nn.cluster, _is_decommissioned, c_helper.DECOMMISSIONING_TIMEOUT,
|
||||||
all_found = False
|
_("Decommission %s") % "DataNodes", 3, {
|
||||||
|
'r': r, 'inst_to_be_deleted': inst_to_be_deleted})
|
||||||
|
|
||||||
while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
|
r.write_files_to({
|
||||||
cmd = r.execute_command(
|
'/etc/hadoop/dn.incl': utils.
|
||||||
"sudo -u hdfs hadoop dfsadmin -report")
|
generate_fqdn_host_names(survived_inst),
|
||||||
all_found = True
|
'/etc/hadoop/dn.excl': ""})
|
||||||
datanodes_info = parse_dfs_report(cmd[1])
|
|
||||||
for i in inst_to_be_deleted:
|
|
||||||
for dn in datanodes_info:
|
|
||||||
if (dn["Name"].startswith(i.internal_ip)) and (
|
|
||||||
dn["Decommission Status"] != "Decommissioned"):
|
|
||||||
all_found = False
|
|
||||||
break
|
|
||||||
|
|
||||||
if all_found:
|
|
||||||
r.write_files_to({'/etc/hadoop/dn.incl':
|
|
||||||
utils.
|
|
||||||
generate_fqdn_host_names(survived_inst),
|
|
||||||
'/etc/hadoop/dn.excl': "",
|
|
||||||
})
|
|
||||||
break
|
|
||||||
context.sleep(3)
|
|
||||||
|
|
||||||
if not all_found:
|
|
||||||
ex.DecommissionError(
|
|
||||||
_("Cannot finish decommission of cluster %(cluster)s in "
|
|
||||||
"%(seconds)d seconds") %
|
|
||||||
{"cluster": nn.cluster, "seconds": timeout})
|
|
||||||
|
|
||||||
|
|
||||||
def parse_dfs_report(cmd_output):
|
def parse_dfs_report(cmd_output):
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
# Copyright (c) 2015 Mirantis Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
# implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import mock
|
||||||
|
import six
|
||||||
|
|
||||||
|
from sahara.tests.unit import base
|
||||||
|
from sahara.utils import poll_utils
|
||||||
|
|
||||||
|
|
||||||
|
class TestPollUtils(base.SaharaTestCase):
|
||||||
|
@mock.patch('sahara.utils.poll_utils.LOG.debug')
|
||||||
|
def test_poll_success(self, logger):
|
||||||
|
poll_utils.poll(**{'get_status': lambda: True,
|
||||||
|
'kwargs': {}, 'timeout': 5, 'sleep': 3})
|
||||||
|
expected_call = mock.call(
|
||||||
|
'Operation was executed successfully in timeout 5')
|
||||||
|
self.assertEqual(1, logger.call_count)
|
||||||
|
self.assertEqual([expected_call], logger.call_args_list)
|
||||||
|
|
||||||
|
@mock.patch('sahara.context.sleep', return_value=None)
|
||||||
|
@mock.patch('sahara.utils.poll_utils._get_consumed', return_value=0)
|
||||||
|
def test_poll_failed_first_scenario(self, p_1, p_2):
|
||||||
|
message = ""
|
||||||
|
try:
|
||||||
|
poll_utils.poll(
|
||||||
|
**{'get_status': lambda: False, 'kwargs': {},
|
||||||
|
'timeout': 0, 'sleep': 3})
|
||||||
|
except Exception as e:
|
||||||
|
message = six.text_type(e)
|
||||||
|
|
||||||
|
if message.find('Error ID') != -1:
|
||||||
|
message = message.split("\n")[0]
|
||||||
|
expected_message = "'Operation' timed out after 0 second(s)"
|
||||||
|
|
||||||
|
self.assertEqual(expected_message, message)
|
||||||
|
|
||||||
|
@mock.patch('sahara.context.sleep', return_value=None)
|
||||||
|
@mock.patch('sahara.utils.poll_utils._get_consumed', return_value=0)
|
||||||
|
def test_poll_failed_second_scenario(self, p_1, p_2):
|
||||||
|
message = ""
|
||||||
|
try:
|
||||||
|
poll_utils.poll(
|
||||||
|
**{'get_status': lambda: False, 'kwargs': {},
|
||||||
|
'timeout': 0, 'sleep': 3, 'timeout_name': "some timeout"})
|
||||||
|
except Exception as e:
|
||||||
|
message = six.text_type(e)
|
||||||
|
|
||||||
|
if message.find('Error ID') != -1:
|
||||||
|
message = message.split("\n")[0]
|
||||||
|
expected_message = ("'Operation' timed out after 0 second(s) and "
|
||||||
|
"following timeout was violated: some timeout")
|
||||||
|
|
||||||
|
self.assertEqual(expected_message, message)
|
|
@ -0,0 +1,111 @@
|
||||||
|
# Copyright (c) 2015 Mirantis Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
# implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from oslo_log import log as logging
|
||||||
|
from oslo_utils import timeutils
|
||||||
|
|
||||||
|
from sahara import context
|
||||||
|
from sahara import exceptions as ex
|
||||||
|
from sahara.utils import general
|
||||||
|
|
||||||
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# set 3 hours timeout by default
|
||||||
|
DEFAULT_TIMEOUT = 10800
|
||||||
|
DEFAULT_SLEEP_TIME = 5
|
||||||
|
|
||||||
|
|
||||||
|
def _get_consumed(started_at):
|
||||||
|
return timeutils.delta_seconds(started_at, timeutils.utcnow())
|
||||||
|
|
||||||
|
|
||||||
|
def _get_current_value(cluster, option):
|
||||||
|
option_target = option.applicable_target
|
||||||
|
conf = cluster.cluster_configs
|
||||||
|
if option_target in conf and option.name in conf[option_target]:
|
||||||
|
return conf[option_target][option.name]
|
||||||
|
return option.default_value
|
||||||
|
|
||||||
|
|
||||||
|
def poll(get_status, kwargs, operation_name=None, timeout_name=None,
|
||||||
|
timeout=DEFAULT_TIMEOUT, sleep=DEFAULT_SLEEP_TIME,
|
||||||
|
exception_strategy='raise'):
|
||||||
|
"""This util poll status of object obj during some timeout.
|
||||||
|
|
||||||
|
:param get_status: function, which return current status of polling
|
||||||
|
as Boolean
|
||||||
|
:param kwargs: keyword arguments of function get_status
|
||||||
|
:param operation_name: name of polling process
|
||||||
|
:param timeout_name: name of timeout option
|
||||||
|
:param timeout: value of timeout in seconds. By default, it equals to
|
||||||
|
3 hours
|
||||||
|
:param sleep: duration between two consecutive executions of
|
||||||
|
get_status function
|
||||||
|
:param exception_strategy: possible values ('raise', 'mark_as_true',
|
||||||
|
'mark_as_false'). If exception_strategy is 'raise' exception would be
|
||||||
|
raised. If exception_strategy is 'mark_as_true', return value of
|
||||||
|
get_status would marked as True, and in case of 'mark_as_false' - False.
|
||||||
|
By default it's 'raise'.
|
||||||
|
"""
|
||||||
|
start_time = timeutils.utcnow()
|
||||||
|
# We shouldn't raise TimeoutException if incorrect timeout specified and
|
||||||
|
# status is ok now. In such way we should execute get_status at least once.
|
||||||
|
at_least_once = True
|
||||||
|
|
||||||
|
while at_least_once or _get_consumed(start_time) < timeout:
|
||||||
|
at_least_once = False
|
||||||
|
try:
|
||||||
|
status = get_status(**kwargs)
|
||||||
|
except BaseException:
|
||||||
|
if exception_strategy == 'raise':
|
||||||
|
raise
|
||||||
|
elif exception_strategy == 'mark_as_true':
|
||||||
|
status = True
|
||||||
|
else:
|
||||||
|
status = False
|
||||||
|
|
||||||
|
if status:
|
||||||
|
operation = "Operation"
|
||||||
|
if operation_name:
|
||||||
|
operation = "Operation with name {op_name}".format(
|
||||||
|
op_name=operation_name)
|
||||||
|
LOG.debug(
|
||||||
|
'{operation_desc} was executed successfully in timeout '
|
||||||
|
'{timeout}'
|
||||||
|
.format(operation_desc=operation, timeout=timeout))
|
||||||
|
return
|
||||||
|
|
||||||
|
context.sleep(sleep)
|
||||||
|
raise ex.TimeoutException(timeout, operation_name, timeout_name)
|
||||||
|
|
||||||
|
|
||||||
|
def plugin_option_poll(cluster, get_status, option, operation_name, sleep_time,
|
||||||
|
kwargs):
|
||||||
|
|
||||||
|
def _get(n_cluster, n_kwargs):
|
||||||
|
if not general.check_cluster_exists(n_cluster):
|
||||||
|
return True
|
||||||
|
return get_status(**n_kwargs)
|
||||||
|
|
||||||
|
poll_description = {
|
||||||
|
'get_status': _get,
|
||||||
|
'kwargs': {'n_cluster': cluster, 'n_kwargs': kwargs},
|
||||||
|
'timeout': _get_current_value(cluster, option),
|
||||||
|
'operation_name': operation_name,
|
||||||
|
'sleep': sleep_time,
|
||||||
|
'timeout_name': option.name
|
||||||
|
}
|
||||||
|
|
||||||
|
poll(**poll_description)
|
Loading…
Reference in New Issue