Adding cluster, instance, job_execution ids to logs
implements bp: logs-improvement Change-Id: Ic4b64fa44e5a9a8bddcceef87fe5c706e31048d9
This commit is contained in:
parent
477a72f3e4
commit
d39332a686
@ -301,3 +301,29 @@ class InstanceInfoManager(object):
|
||||
|
||||
def __exit__(self, *args):
|
||||
current().current_instance_info = self.prev_instance_info
|
||||
|
||||
|
||||
def set_current_cluster_id(cluster_id):
|
||||
current().resource_uuid = 'none, cluster: %s' % cluster_id
|
||||
|
||||
|
||||
def set_current_job_execution_id(je_id):
|
||||
current().resource_uuid = 'none, job_execution: %s' % je_id
|
||||
|
||||
|
||||
class SetCurrentInstanceId(object):
|
||||
def __init__(self, instance_id):
|
||||
ctx = current()
|
||||
self.prev_uuid = ctx.resource_uuid
|
||||
if ctx.resource_uuid:
|
||||
ctx.resource_uuid = ctx.resource_uuid.replace('none', instance_id)
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, *ex):
|
||||
current().resource_uuid = self.prev_uuid
|
||||
|
||||
|
||||
def set_current_instance_id(instance_id):
|
||||
return SetCurrentInstanceId(instance_id)
|
||||
|
@ -187,6 +187,8 @@ class AmbariPlugin(p.ProvisioningPluginBase):
|
||||
_("Provision cluster via Ambari"), len(servers))
|
||||
|
||||
for server in servers:
|
||||
with context.set_current_instance_id(
|
||||
server.instance['instance_id']):
|
||||
self._spawn(
|
||||
"hdp-provision-instance-%s" % server.instance.hostname(),
|
||||
server.provision_ambari, ambari_info, cluster_spec)
|
||||
@ -326,6 +328,8 @@ class AmbariPlugin(p.ProvisioningPluginBase):
|
||||
cluster.id, _("Provision cluster via Ambari"), len(servers))
|
||||
|
||||
for server in servers:
|
||||
with context.set_current_instance_id(
|
||||
server.instance['instance_id']):
|
||||
self._spawn('Ambari provisioning thread',
|
||||
server.provision_ambari, ambari_info, cluster_spec)
|
||||
|
||||
|
@ -124,6 +124,8 @@ class VersionHandler(avm.AbstractVersionHandler):
|
||||
len(servers))
|
||||
|
||||
for server in servers:
|
||||
with context.set_current_instance_id(
|
||||
server.instance['instance_id']):
|
||||
server.install_swift_integration()
|
||||
|
||||
def get_services_processor(self):
|
||||
@ -330,10 +332,11 @@ class AmbariClient(object):
|
||||
add_host_component_url = ('http://{0}/api/v1/clusters/{1}'
|
||||
'/hosts/{2}/host_components/{3}')
|
||||
for host in servers:
|
||||
with context.set_current_instance_id(host.instance['instance_id']):
|
||||
hostname = host.instance.fqdn().lower()
|
||||
result = self._post(
|
||||
add_host_url.format(ambari_info.get_address(), name, hostname),
|
||||
ambari_info)
|
||||
add_host_url.format(ambari_info.get_address(), name,
|
||||
hostname), ambari_info)
|
||||
if result.status_code != 201:
|
||||
LOG.error(
|
||||
_LE('Create host command failed. {result}').format(
|
||||
@ -348,8 +351,8 @@ class AmbariClient(object):
|
||||
# don't add any AMBARI components
|
||||
if component.find('AMBARI') != 0:
|
||||
result = self._post(add_host_component_url.format(
|
||||
ambari_info.get_address(), name, hostname, component),
|
||||
ambari_info)
|
||||
ambari_info.get_address(), name, hostname,
|
||||
component), ambari_info)
|
||||
if result.status_code != 201:
|
||||
LOG.error(
|
||||
_LE('Create host_component command failed. '
|
||||
|
@ -19,6 +19,7 @@ from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
import six
|
||||
|
||||
from sahara import context
|
||||
from sahara import exceptions as e
|
||||
from sahara.i18n import _
|
||||
from sahara.i18n import _LI
|
||||
@ -1268,6 +1269,8 @@ class HueService(Service):
|
||||
for ng in hue_ngs:
|
||||
if ng.instances:
|
||||
for instance in ng.instances:
|
||||
with context.set_current_instance_id(
|
||||
instance.instance_id):
|
||||
HueService._handle_pre_service_start(instance,
|
||||
cluster_spec,
|
||||
hue_ini,
|
||||
|
@ -114,6 +114,8 @@ class VersionHandler(avm.AbstractVersionHandler):
|
||||
len(servers))
|
||||
|
||||
for server in servers:
|
||||
with context.set_current_instance_id(
|
||||
server.instance['instance_id']):
|
||||
server.install_swift_integration()
|
||||
|
||||
def get_services_processor(self):
|
||||
@ -308,10 +310,11 @@ class AmbariClient(object):
|
||||
add_host_component_url = ('http://{0}/api/v1/clusters/{1}'
|
||||
'/hosts/{2}/host_components/{3}')
|
||||
for host in servers:
|
||||
with context.set_current_instance_id(host.instance['instance_id']):
|
||||
hostname = host.instance.fqdn().lower()
|
||||
result = self._post(
|
||||
add_host_url.format(ambari_info.get_address(), name, hostname),
|
||||
ambari_info)
|
||||
add_host_url.format(ambari_info.get_address(), name,
|
||||
hostname), ambari_info)
|
||||
if result.status_code != 201:
|
||||
LOG.error(
|
||||
_LE('Create host command failed. {result}').format(
|
||||
@ -329,8 +332,8 @@ class AmbariClient(object):
|
||||
if (component.find('AMBARI') != 0
|
||||
and component.find('HUE') != 0):
|
||||
result = self._post(add_host_component_url.format(
|
||||
ambari_info.get_address(), name, hostname, component),
|
||||
ambari_info)
|
||||
ambari_info.get_address(), name, hostname,
|
||||
component), ambari_info)
|
||||
if result.status_code != 201:
|
||||
LOG.error(
|
||||
_LE('Create host_component command failed. '
|
||||
|
@ -17,6 +17,7 @@ from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
import six
|
||||
|
||||
from sahara import context
|
||||
from sahara.i18n import _
|
||||
from sahara.i18n import _LW
|
||||
from sahara.plugins.vanilla.hadoop2 import config_helper as c_helper
|
||||
@ -63,6 +64,7 @@ def configure_instances(pctx, instances):
|
||||
instances[0].cluster_id, _("Configure instances"), len(instances))
|
||||
|
||||
for instance in instances:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
_configure_instance(pctx, instance)
|
||||
|
||||
|
||||
|
@ -45,6 +45,7 @@ def start_dn_nm_processes(instances):
|
||||
|
||||
with context.ThreadGroup() as tg:
|
||||
for instance in instances:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
processes = set(instance.node_group.node_processes)
|
||||
processes = processes.intersection(filternames)
|
||||
tg.spawn('vanilla-start-processes-%s' % instance.instance_name,
|
||||
@ -80,6 +81,7 @@ def start_historyserver(instance):
|
||||
|
||||
@cpo.event_wrapper(True, step=pu.start_process_event_message("Oozie"))
|
||||
def start_oozie_process(pctx, instance):
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
with instance.remote() as r:
|
||||
if c_helper.is_mysql_enabled(pctx, instance.cluster):
|
||||
_start_mysql(r)
|
||||
@ -208,6 +210,7 @@ def _hive_metastore_start(remote):
|
||||
|
||||
@cpo.event_wrapper(True, step=pu.start_process_event_message("HiveServer"))
|
||||
def start_hiveserver_process(pctx, instance):
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
with instance.remote() as r:
|
||||
_hive_create_warehouse_dir(r)
|
||||
_hive_copy_shared_conf(
|
||||
|
@ -159,12 +159,14 @@ class VersionHandler(avm.AbstractVersionHandler):
|
||||
nn_instance = vu.get_namenode(cluster)
|
||||
|
||||
with remote.get_remote(oozie) as r:
|
||||
with context.set_current_instance_id(oozie.instance_id):
|
||||
if c_helper.is_mysql_enable(cluster):
|
||||
run.mysql_start(r, oozie)
|
||||
run.oozie_create_db(r)
|
||||
run.oozie_share_lib(r, nn_instance.hostname())
|
||||
run.start_oozie(r)
|
||||
LOG.info(_LI("Oozie service at {host} has been started").format(
|
||||
LOG.info(
|
||||
_LI("Oozie service at {host} has been started").format(
|
||||
host=nn_instance.hostname()))
|
||||
|
||||
def start_hiveserver(self, cluster):
|
||||
@ -178,6 +180,7 @@ class VersionHandler(avm.AbstractVersionHandler):
|
||||
oozie = vu.get_oozie(cluster)
|
||||
|
||||
with remote.get_remote(hive_server) as r:
|
||||
with context.set_current_instance_id(hive_server.instance_id):
|
||||
run.hive_create_warehouse_dir(r)
|
||||
run.hive_copy_shared_conf(
|
||||
r, edp.get_hive_shared_conf_path('hadoop'))
|
||||
|
@ -54,6 +54,7 @@ def get_cluster(id, show_progress=False):
|
||||
|
||||
|
||||
def scale_cluster(id, data):
|
||||
context.set_current_cluster_id(id)
|
||||
ctx = context.ctx()
|
||||
|
||||
cluster = conductor.cluster_get(ctx, id)
|
||||
@ -96,6 +97,7 @@ def scale_cluster(id, data):
|
||||
def create_cluster(values):
|
||||
ctx = context.ctx()
|
||||
cluster = conductor.cluster_create(ctx, values)
|
||||
context.set_current_cluster_id(cluster.id)
|
||||
sender.notify(ctx, cluster.id, cluster.name, "New",
|
||||
"create")
|
||||
plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
|
||||
@ -124,6 +126,7 @@ def _add_ports_for_auto_sg(ctx, cluster, plugin):
|
||||
|
||||
|
||||
def terminate_cluster(id):
|
||||
context.set_current_cluster_id(id)
|
||||
cluster = g.change_cluster_status(id, "Deleting")
|
||||
|
||||
OPS.terminate_cluster(id)
|
||||
|
@ -282,6 +282,7 @@ class DirectEngine(e.Engine):
|
||||
cluster = g.change_cluster_status(cluster, "Deleting Instances")
|
||||
|
||||
for instance in instances_to_delete:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
self._shutdown_instance(instance)
|
||||
|
||||
self._await_deleted(cluster, instances_to_delete)
|
||||
@ -418,6 +419,7 @@ class DirectEngine(e.Engine):
|
||||
|
||||
def _assign_floating_ips(self, instances):
|
||||
for instance in instances:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
node_group = instance.node_group
|
||||
if node_group.floating_ip_pool:
|
||||
networks.assign_floating_ip(instance.instance_id,
|
||||
@ -431,6 +433,7 @@ class DirectEngine(e.Engine):
|
||||
return True
|
||||
for instance in instances:
|
||||
if instance.id not in active_ids:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
if self._check_if_active(instance):
|
||||
active_ids.add(instance.id)
|
||||
cpo.add_successful_event(instance)
|
||||
@ -460,6 +463,7 @@ class DirectEngine(e.Engine):
|
||||
|
||||
for instance in instances:
|
||||
if instance.id not in deleted_ids:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
if self._check_if_deleted(instance):
|
||||
LOG.debug("Instance {instance} is deleted".format(
|
||||
instance=instance.instance_name))
|
||||
@ -503,6 +507,7 @@ class DirectEngine(e.Engine):
|
||||
"""Attempt to rollback cluster scaling."""
|
||||
|
||||
for i in instances:
|
||||
with context.set_current_instance_id(i.instance_id):
|
||||
self._shutdown_instance(i)
|
||||
|
||||
cluster = conductor.cluster_get(context.ctx(), cluster)
|
||||
@ -511,6 +516,7 @@ class DirectEngine(e.Engine):
|
||||
def _shutdown_instances(self, cluster):
|
||||
for node_group in cluster.node_groups:
|
||||
for instance in node_group.instances:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
self._shutdown_instance(instance)
|
||||
|
||||
self._await_deleted(cluster, node_group.instances)
|
||||
|
@ -123,6 +123,7 @@ def execute_job(job_id, data):
|
||||
'info': {'status': edp.JOB_STATUS_PENDING},
|
||||
'job_configs': configs, 'extra': {}}
|
||||
job_execution = conductor.job_execution_create(context.ctx(), job_ex_dict)
|
||||
context.set_current_job_execution_id(job_execution.id)
|
||||
|
||||
# check to use proxy user
|
||||
if p.job_execution_requires_proxy_user(job_execution):
|
||||
@ -153,6 +154,7 @@ def get_job_execution(id):
|
||||
|
||||
|
||||
def cancel_job_execution(id):
|
||||
context.set_current_job_execution_id(id)
|
||||
job_execution = conductor.job_execution_get(context.ctx(), id)
|
||||
OPS.cancel_job_execution(id)
|
||||
|
||||
@ -160,6 +162,7 @@ def cancel_job_execution(id):
|
||||
|
||||
|
||||
def delete_job_execution(id):
|
||||
context.set_current_job_execution_id(id)
|
||||
OPS.delete_job_execution(id)
|
||||
|
||||
|
||||
|
@ -72,6 +72,7 @@ class Engine(object):
|
||||
return True
|
||||
for instance in instances:
|
||||
if instance.id not in ips_assigned:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
if networks.init_instances_ips(instance):
|
||||
ips_assigned.add(instance.id)
|
||||
cpo.add_successful_event(instance)
|
||||
@ -98,6 +99,7 @@ class Engine(object):
|
||||
|
||||
with context.ThreadGroup() as tg:
|
||||
for instance in instances:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
tg.spawn("wait-for-ssh-%s" % instance.instance_name,
|
||||
self._wait_until_accessible, instance)
|
||||
|
||||
@ -148,7 +150,9 @@ class Engine(object):
|
||||
with context.ThreadGroup() as tg:
|
||||
for node_group in cluster.node_groups:
|
||||
for instance in node_group.instances:
|
||||
tg.spawn("configure-instance-%s" % instance.instance_name,
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
tg.spawn(
|
||||
"configure-instance-%s" % instance.instance_name,
|
||||
self._configure_instance, instance, hosts_file)
|
||||
|
||||
@cpo.event_wrapper(mark_successful_on_exit=True)
|
||||
|
@ -81,6 +81,7 @@ def get_time_since_last_update(cluster):
|
||||
def terminate_cluster(ctx, cluster, description):
|
||||
if CONF.use_identity_api_v3:
|
||||
trusts.use_os_admin_auth_token(cluster)
|
||||
context.set_current_cluster_id(cluster.id)
|
||||
|
||||
LOG.debug('Terminating {description} cluster {cluster} '
|
||||
'in "{status}" state with id {id}'
|
||||
|
@ -59,9 +59,11 @@ def attach_to_instances(instances):
|
||||
with context.ThreadGroup() as tg:
|
||||
for instance in instances:
|
||||
if instance.node_group.volumes_per_node > 0:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
tg.spawn(
|
||||
'attach-volumes-for-instance-%s' % instance.instance_name,
|
||||
_attach_volumes_to_node, instance.node_group, instance)
|
||||
'attach-volumes-for-instance-%s'
|
||||
% instance.instance_name, _attach_volumes_to_node,
|
||||
instance.node_group, instance)
|
||||
|
||||
|
||||
@poll_utils.poll_status(
|
||||
@ -156,12 +158,13 @@ def mount_to_instances(instances):
|
||||
|
||||
with context.ThreadGroup() as tg:
|
||||
for instance in instances:
|
||||
with context.set_current_instance_id(instance.instance_id):
|
||||
devices = _find_instance_volume_devices(instance)
|
||||
|
||||
# Since formatting can take several minutes (for large disks) and
|
||||
# can be done in parallel, launch one thread per disk.
|
||||
# Since formating can take several minutes (for large disks)
|
||||
# and can be done in parallel, launch one thread per disk.
|
||||
for idx in range(0, instance.node_group.volumes_per_node):
|
||||
tg.spawn('mount-volume-%d-to-node-%s' %
|
||||
tg.spawn(
|
||||
'mount-volume-%d-to-node-%s' %
|
||||
(idx, instance.instance_name),
|
||||
_mount_volume_to_node, instance, idx, devices[idx])
|
||||
|
||||
|
@ -48,8 +48,9 @@ class FakeNodeGroup(object):
|
||||
|
||||
|
||||
class FakeInstance(object):
|
||||
def __init__(self, inst_name, management_ip, user, priv_key):
|
||||
def __init__(self, inst_name, inst_id, management_ip, user, priv_key):
|
||||
self.instance_name = inst_name
|
||||
self.instance_id = inst_id
|
||||
self.management_ip = management_ip
|
||||
self.node_group = FakeNodeGroup(user, priv_key)
|
||||
|
||||
@ -107,7 +108,7 @@ class TestInstanceInteropHelper(base.SaharaTestCase):
|
||||
def test_use_floating_ips(self, p_adapter):
|
||||
self.override_config('use_floating_ips', True)
|
||||
|
||||
instance = FakeInstance('inst1', '10.0.0.1', 'user1', 'key1')
|
||||
instance = FakeInstance('inst1', '123', '10.0.0.1', 'user1', 'key1')
|
||||
remote = ssh_remote.InstanceInteropHelper(instance)
|
||||
|
||||
# Test SSH
|
||||
@ -128,7 +129,7 @@ class TestInstanceInteropHelper(base.SaharaTestCase):
|
||||
self.override_config('use_floating_ips', False)
|
||||
self.override_config('use_namespaces', True)
|
||||
|
||||
instance = FakeInstance('inst2', '10.0.0.2', 'user2', 'key2')
|
||||
instance = FakeInstance('inst2', '123', '10.0.0.2', 'user2', 'key2')
|
||||
remote = ssh_remote.InstanceInteropHelper(instance)
|
||||
|
||||
# Test SSH
|
||||
@ -152,7 +153,7 @@ class TestInstanceInteropHelper(base.SaharaTestCase):
|
||||
def test_proxy_command(self, p_adapter, p_simple_exec_func):
|
||||
self.override_config('proxy_command', 'ssh fakerelay nc {host} {port}')
|
||||
|
||||
instance = FakeInstance('inst3', '10.0.0.3', 'user3', 'key3')
|
||||
instance = FakeInstance('inst3', '123', '10.0.0.3', 'user3', 'key3')
|
||||
remote = ssh_remote.InstanceInteropHelper(instance)
|
||||
|
||||
# Test SSH
|
||||
@ -171,7 +172,7 @@ class TestInstanceInteropHelper(base.SaharaTestCase):
|
||||
def test_proxy_command_bad(self):
|
||||
self.override_config('proxy_command', '{bad_kw} nc {host} {port}')
|
||||
|
||||
instance = FakeInstance('inst4', '10.0.0.4', 'user4', 'key4')
|
||||
instance = FakeInstance('inst4', '123', '10.0.0.4', 'user4', 'key4')
|
||||
remote = ssh_remote.InstanceInteropHelper(instance)
|
||||
|
||||
# Test SSH
|
||||
|
@ -763,8 +763,8 @@ class InstanceInteropHelper(remote.Remote):
|
||||
self._run_s(_execute_on_vm_interactive, timeout, cmd, matcher)
|
||||
|
||||
def _log_command(self, str):
|
||||
LOG.debug('[{instance}] {command}'.format(
|
||||
instance=self.instance.instance_name, command=str))
|
||||
with context.set_current_instance_id(self.instance.instance_id):
|
||||
LOG.debug(str)
|
||||
|
||||
|
||||
class BulkInstanceInteropHelper(InstanceInteropHelper):
|
||||
|
Loading…
Reference in New Issue
Block a user