diff --git a/releasenotes/notes/remove-cdh_5.0_5.3_5.4-b5f140e9b0233c07.yaml b/releasenotes/notes/remove-cdh_5.0_5.3_5.4-b5f140e9b0233c07.yaml new file mode 100644 index 00000000..af028395 --- /dev/null +++ b/releasenotes/notes/remove-cdh_5.0_5.3_5.4-b5f140e9b0233c07.yaml @@ -0,0 +1,3 @@ +--- +features: + - Versions 5.0.0 5.3.0 and 5.4.0 of Cloudera plugin are removed. diff --git a/sahara/plugins/cdh/cloudera_utils.py b/sahara/plugins/cdh/cloudera_utils.py index 91c9e977..fafcf437 100644 --- a/sahara/plugins/cdh/cloudera_utils.py +++ b/sahara/plugins/cdh/cloudera_utils.py @@ -23,6 +23,7 @@ from sahara.i18n import _ from sahara.plugins.cdh.client import api_client from sahara.plugins.cdh.client import services from sahara.plugins.cdh import db_helper +from sahara.plugins.cdh import plugin_utils from sahara.plugins import exceptions as ex from sahara.plugins import kerberos from sahara.topology import topology_helper as t_helper @@ -62,8 +63,7 @@ class ClouderaUtils(object): HBASE_SERVICE_NAME = 'hbase01' def __init__(self): - # pu will be defined in derived class. - self.pu = None + self.pu = plugin_utils.AbstractPluginUtils() def get_api_client_by_default_password(self, cluster): manager_ip = self.pu.get_manager(cluster).management_ip diff --git a/sahara/plugins/cdh/plugin.py b/sahara/plugins/cdh/plugin.py index abe8db8d..e218ab63 100644 --- a/sahara/plugins/cdh/plugin.py +++ b/sahara/plugins/cdh/plugin.py @@ -35,15 +35,10 @@ class CDHPluginProvider(p.ProvisioningPluginBase): def get_labels(self): default = {'enabled': {'status': True}, 'stable': {'status': True}} result = {'plugin_labels': copy.deepcopy(default)} - deprecated = {'enabled': {'status': True}, - 'deprecated': {'status': True}} result['version_labels'] = { '5.9.0': copy.deepcopy(default), '5.7.0': copy.deepcopy(default), '5.5.0': copy.deepcopy(default), - '5.4.0': copy.deepcopy(deprecated), - '5.3.0': copy.deepcopy(deprecated), - '5': copy.deepcopy(deprecated), } return result diff --git a/sahara/plugins/cdh/v5/__init__.py b/sahara/plugins/cdh/v5/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/sahara/plugins/cdh/v5/cloudera_utils.py b/sahara/plugins/cdh/v5/cloudera_utils.py deleted file mode 100644 index a713eb0f..00000000 --- a/sahara/plugins/cdh/v5/cloudera_utils.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright (c) 2014 Mirantis Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sahara.i18n import _ -from sahara.plugins.cdh import cloudera_utils as cu -from sahara.plugins.cdh.v5 import config_helper -from sahara.plugins.cdh.v5 import plugin_utils as pu -from sahara.plugins.cdh.v5 import validation -from sahara.swift import swift_helper -from sahara.utils import cluster_progress_ops as cpo -from sahara.utils import configs as s_cfg -from sahara.utils import xmlutils - - -HDFS_SERVICE_TYPE = 'HDFS' -YARN_SERVICE_TYPE = 'YARN' -OOZIE_SERVICE_TYPE = 'OOZIE' -HIVE_SERVICE_TYPE = 'HIVE' -HUE_SERVICE_TYPE = 'HUE' -SPARK_SERVICE_TYPE = 'SPARK_ON_YARN' -ZOOKEEPER_SERVICE_TYPE = 'ZOOKEEPER' -HBASE_SERVICE_TYPE = 'HBASE' - -c_helper = config_helper.ConfigHelperV5() - - -class ClouderaUtilsV5(cu.ClouderaUtils): - - def __init__(self): - cu.ClouderaUtils.__init__(self) - self.pu = pu.PluginUtilsV5() - self.validator = validation.ValidatorV5 - - @cu.cloudera_cmd - def format_namenode(self, hdfs_service): - for nn in hdfs_service.get_roles_by_type('NAMENODE'): - yield hdfs_service.format_hdfs(nn.name)[0] - - @cu.cloudera_cmd - def create_hdfs_tmp(self, hdfs_service): - yield hdfs_service.create_hdfs_tmp() - - @cu.cloudera_cmd - def create_yarn_job_history_dir(self, yarn_service): - yield yarn_service.create_yarn_job_history_dir() - - @cu.cloudera_cmd - def create_oozie_db(self, oozie_service): - yield oozie_service.create_oozie_db() - - @cu.cloudera_cmd - def install_oozie_sharelib(self, oozie_service): - yield oozie_service.install_oozie_sharelib() - - @cu.cloudera_cmd - def create_hive_metastore_db(self, hive_service): - yield hive_service.create_hive_metastore_tables() - - @cu.cloudera_cmd - def create_hive_dirs(self, hive_service): - yield hive_service.create_hive_userdir() - yield hive_service.create_hive_warehouse() - - @cu.cloudera_cmd - def create_hbase_root(self, hbase_service): - yield hbase_service.create_hbase_root() - - @cpo.event_wrapper(True, step=_("Create services"), param=('cluster', 1)) - def create_services(self, cluster): - api = self.get_api_client(cluster) - - fullversion = ('5.0.0' if cluster.hadoop_version == '5' - else cluster.hadoop_version) - cm_cluster = api.create_cluster(cluster.name, - fullVersion=fullversion) - - if len(self.pu.get_zookeepers(cluster)) > 0: - cm_cluster.create_service(self.ZOOKEEPER_SERVICE_NAME, - ZOOKEEPER_SERVICE_TYPE) - cm_cluster.create_service(self.HDFS_SERVICE_NAME, HDFS_SERVICE_TYPE) - cm_cluster.create_service(self.YARN_SERVICE_NAME, YARN_SERVICE_TYPE) - cm_cluster.create_service(self.OOZIE_SERVICE_NAME, OOZIE_SERVICE_TYPE) - if self.pu.get_hive_metastore(cluster): - cm_cluster.create_service(self.HIVE_SERVICE_NAME, - HIVE_SERVICE_TYPE) - if self.pu.get_hue(cluster): - cm_cluster.create_service(self.HUE_SERVICE_NAME, HUE_SERVICE_TYPE) - if self.pu.get_spark_historyserver(cluster): - cm_cluster.create_service(self.SPARK_SERVICE_NAME, - SPARK_SERVICE_TYPE) - if self.pu.get_hbase_master(cluster): - cm_cluster.create_service(self.HBASE_SERVICE_NAME, - HBASE_SERVICE_TYPE) - - def await_agents(self, cluster, instances): - self._await_agents(cluster, instances, c_helper.AWAIT_AGENTS_TIMEOUT) - - @cpo.event_wrapper( - True, step=_("Configure services"), param=('cluster', 1)) - def configure_services(self, cluster): - cm_cluster = self.get_cloudera_cluster(cluster) - - if len(self.pu.get_zookeepers(cluster)) > 0: - zookeeper = cm_cluster.get_service(self.ZOOKEEPER_SERVICE_NAME) - zookeeper.update_config(self._get_configs(ZOOKEEPER_SERVICE_TYPE, - cluster=cluster)) - - hdfs = cm_cluster.get_service(self.HDFS_SERVICE_NAME) - hdfs.update_config(self._get_configs(HDFS_SERVICE_TYPE, - cluster=cluster)) - - yarn = cm_cluster.get_service(self.YARN_SERVICE_NAME) - yarn.update_config(self._get_configs(YARN_SERVICE_TYPE, - cluster=cluster)) - - oozie = cm_cluster.get_service(self.OOZIE_SERVICE_NAME) - oozie.update_config(self._get_configs(OOZIE_SERVICE_TYPE, - cluster=cluster)) - - if self.pu.get_hive_metastore(cluster): - hive = cm_cluster.get_service(self.HIVE_SERVICE_NAME) - hive.update_config(self._get_configs(HIVE_SERVICE_TYPE, - cluster=cluster)) - - if self.pu.get_hue(cluster): - hue = cm_cluster.get_service(self.HUE_SERVICE_NAME) - hue.update_config(self._get_configs(HUE_SERVICE_TYPE, - cluster=cluster)) - - if self.pu.get_spark_historyserver(cluster): - spark = cm_cluster.get_service(self.SPARK_SERVICE_NAME) - spark.update_config(self._get_configs(SPARK_SERVICE_TYPE, - cluster=cluster)) - - if self.pu.get_hbase_master(cluster): - hbase = cm_cluster.get_service(self.HBASE_SERVICE_NAME) - hbase.update_config(self._get_configs(HBASE_SERVICE_TYPE, - cluster=cluster)) - - def _get_configs(self, service, cluster=None, instance=None): - def get_hadoop_dirs(mount_points, suffix): - return ','.join([x + suffix for x in mount_points]) - - all_confs = {} - if cluster: - zk_count = self.validator._get_inst_count(cluster, - 'ZOOKEEPER_SERVER') - core_site_safety_valve = '' - if self.pu.c_helper.is_swift_enabled(cluster): - configs = swift_helper.get_swift_configs() - confs = {c['name']: c['value'] for c in configs} - core_site_safety_valve = xmlutils.create_elements_xml(confs) - all_confs = { - 'HDFS': { - 'zookeeper_service': - self.ZOOKEEPER_SERVICE_NAME if zk_count else '', - 'core_site_safety_valve': core_site_safety_valve - }, - 'HIVE': { - 'mapreduce_yarn_service': self.YARN_SERVICE_NAME, - 'zookeeper_service': - self.ZOOKEEPER_SERVICE_NAME if zk_count else '' - }, - 'OOZIE': { - 'mapreduce_yarn_service': self.YARN_SERVICE_NAME, - 'zookeeper_service': - self.ZOOKEEPER_SERVICE_NAME if zk_count else '' - }, - 'YARN': { - 'hdfs_service': self.HDFS_SERVICE_NAME, - 'zookeeper_service': - self.ZOOKEEPER_SERVICE_NAME if zk_count else '' - }, - 'HUE': { - 'hive_service': self.HIVE_SERVICE_NAME, - 'oozie_service': self.OOZIE_SERVICE_NAME, - 'zookeeper_service': - self.ZOOKEEPER_SERVICE_NAME if zk_count else '' - }, - 'SPARK_ON_YARN': { - 'yarn_service': self.YARN_SERVICE_NAME - }, - 'HBASE': { - 'hdfs_service': self.HDFS_SERVICE_NAME, - 'zookeeper_service': self.ZOOKEEPER_SERVICE_NAME - } - } - hive_confs = { - 'HIVE': { - 'hive_metastore_database_type': 'postgresql', - 'hive_metastore_database_host': - self.pu.get_manager(cluster).internal_ip, - 'hive_metastore_database_port': '7432', - 'hive_metastore_database_password': - self.pu.db_helper.get_hive_db_password(cluster) - } - } - hue_confs = { - 'HUE': { - 'hue_webhdfs': - self.pu.get_role_name(self.pu.get_namenode(cluster), - 'NAMENODE') - } - } - - all_confs = s_cfg.merge_configs(all_confs, hue_confs) - all_confs = s_cfg.merge_configs(all_confs, hive_confs) - all_confs = s_cfg.merge_configs(all_confs, cluster.cluster_configs) - - if instance: - paths = instance.storage_paths() - - instance_default_confs = { - 'NAMENODE': { - 'dfs_name_dir_list': get_hadoop_dirs(paths, '/fs/nn') - }, - 'SECONDARYNAMENODE': { - 'fs_checkpoint_dir_list': - get_hadoop_dirs(paths, '/fs/snn') - }, - 'DATANODE': { - 'dfs_data_dir_list': get_hadoop_dirs(paths, '/fs/dn'), - 'dfs_datanode_data_dir_perm': 755, - 'dfs_datanode_handler_count': 30 - }, - 'NODEMANAGER': { - 'yarn_nodemanager_local_dirs': - get_hadoop_dirs(paths, '/yarn/local') - }, - 'SERVER': { - 'maxSessionTimeout': 60000 - } - } - - ng_user_confs = self.pu.convert_process_configs( - instance.node_group.node_configs) - all_confs = s_cfg.merge_configs(all_confs, ng_user_confs) - all_confs = s_cfg.merge_configs(all_confs, instance_default_confs) - - return all_confs.get(service, {}) diff --git a/sahara/plugins/cdh/v5/config_helper.py b/sahara/plugins/cdh/v5/config_helper.py deleted file mode 100644 index 8c395161..00000000 --- a/sahara/plugins/cdh/v5/config_helper.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2014 Mirantis Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sahara.plugins.cdh import config_helper as c_h - - -class ConfigHelperV5(c_h.ConfigHelper): - path_to_config = 'plugins/cdh/v5/resources/' - - CDH5_UBUNTU_REPO = ( - 'deb [arch=amd64] http://archive.cloudera.com/cdh5' - '/ubuntu/precise/amd64/cdh precise-cdh5.0.0 contrib' - '\ndeb-src http://archive.cloudera.com/cdh5/ubuntu' - '/precise/amd64/cdh precise-cdh5.0.0 contrib') - - DEFAULT_CDH5_UBUNTU_REPO_KEY_URL = ( - 'http://archive.cloudera.com/cdh5/ubuntu' - '/precise/amd64/cdh/archive.key') - - CM5_UBUNTU_REPO = ( - 'deb [arch=amd64] http://archive.cloudera.com/cm5' - '/ubuntu/precise/amd64/cm precise-cm5.0.0 contrib' - '\ndeb-src http://archive.cloudera.com/cm5/ubuntu' - '/precise/amd64/cm precise-cm5.0.0 contrib') - - DEFAULT_CM5_UBUNTU_REPO_KEY_URL = ( - 'http://archive.cloudera.com/cm5/ubuntu' - '/precise/amd64/cm/archive.key') - - CDH5_CENTOS_REPO = ( - '[cloudera-cdh5]' - '\nname=Cloudera\'s Distribution for Hadoop, Version 5' - '\nbaseurl=http://archive.cloudera.com/cdh5/redhat/6' - '/x86_64/cdh/5.0.0/' - '\ngpgkey = http://archive.cloudera.com/cdh5/redhat/6' - '/x86_64/cdh/RPM-GPG-KEY-cloudera' - '\ngpgcheck = 1') - - CM5_CENTOS_REPO = ( - '[cloudera-manager]' - '\nname=Cloudera Manager' - '\nbaseurl=http://archive.cloudera.com/cm5/redhat/6' - '/x86_64/cm/5.0.0/' - '\ngpgkey = http://archive.cloudera.com/cm5/redhat/6' - '/x86_64/cm/RPM-GPG-KEY-cloudera' - '\ngpgcheck = 1') - - def __init__(self): - super(ConfigHelperV5, self).__init__() - self.priority_one_confs = self._load_json( - self.path_to_config + 'priority-one-confs.json') - self._load_ng_plugin_configs() - - def _load_ng_plugin_configs(self): - self.hdfs_confs = self._load_and_init_configs( - 'hdfs-service.json', 'HDFS', 'cluster') - self.namenode_confs = self._load_and_init_configs( - 'hdfs-namenode.json', 'NAMENODE', 'node') - self.datanode_confs = self._load_and_init_configs( - 'hdfs-datanode.json', 'DATANODE', 'node') - self.secnamenode_confs = self._load_and_init_configs( - 'hdfs-secondarynamenode.json', 'SECONDARYNAMENODE', 'node') - self.hdfs_gateway_confs = self._load_and_init_configs( - 'hdfs-gateway.json', 'HDFS_GATEWAY', 'node') - self.yarn_confs = self._load_and_init_configs( - 'yarn-service.json', 'YARN', 'cluster') - self.resourcemanager_confs = self._load_and_init_configs( - 'yarn-resourcemanager.json', 'YARN_GATEWAY', 'node') - self.nodemanager_confs = self._load_and_init_configs( - 'yarn-nodemanager.json', 'RESOURCEMANAGER', 'node') - self.jobhistory_confs = self._load_and_init_configs( - 'yarn-jobhistory.json', 'NODEMANAGER', 'node') - self.yarn_gateway = self._load_and_init_configs( - 'yarn-gateway.json', 'JOBHISTORY', 'node') - self.oozie_service_confs = self._load_and_init_configs( - 'oozie-service.json', 'OOZIE', 'cluster') - self.oozie_role_confs = self._load_and_init_configs( - 'oozie-oozie.json', 'OOZIE', 'node') - self.hive_service_confs = self._load_and_init_configs( - 'hive-service.json', 'HIVE', 'cluster') - self.hive_metastore_confs = self._load_and_init_configs( - 'hive-metastore.json', 'HIVEMETASTORE', 'node') - self.hive_hiveserver_confs = self._load_and_init_configs( - 'hive-hiveserver2.json', 'HIVESERVER', 'node') - self.hive_webhcat_confs = self._load_and_init_configs( - 'hive-webhcat.json', 'WEBHCAT', 'node') - self.hue_service_confs = self._load_and_init_configs( - 'hue-service.json', 'HUE', 'cluster') - self.hue_role_confs = self._load_and_init_configs( - 'hue-hue.json', 'HUE', 'node') - self.spark_service_confs = self._load_and_init_configs( - 'spark-service.json', 'SPARK_ON_YARN', 'cluster') - self.spark_role_confs = self._load_and_init_configs( - 'spark-history.json', 'SPARK_ON_YARN', 'node') - self.zookeeper_server_confs = self._load_and_init_configs( - 'zookeeper-server.json', 'ZOOKEEPER', 'cluster') - self.zookeeper_service_confs = self._load_and_init_configs( - 'zookeeper-service.json', 'ZOOKEEPER', 'node') - self.hbase_confs = self._load_and_init_configs( - 'hbase-service.json', 'HBASE', 'cluster') - self.master_confs = self._load_and_init_configs( - 'hbase-master.json', 'MASTER', 'node') - self.regionserver_confs = self._load_and_init_configs( - 'hbase-regionserver.json', 'REGIONSERVER', 'node') diff --git a/sahara/plugins/cdh/v5/db_helper.py b/sahara/plugins/cdh/v5/db_helper.py deleted file mode 100644 index c892fd7f..00000000 --- a/sahara/plugins/cdh/v5/db_helper.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) 2014 Mirantis Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sahara.plugins.cdh import db_helper as dh -from sahara.utils import files - - -def get_hive_db_password(cluster): - return dh.get_password_from_db(cluster, dh.HIVE_DB_PASSWORD) - - -def create_hive_database(cluster, remote): - db_password = get_hive_db_password(cluster) - create_db_script = files.get_file_text( - 'plugins/cdh/v5/resources/create_hive_db.sql') - create_db_script = create_db_script % db_password - dh.remote_execute_db_script(remote, create_db_script) diff --git a/sahara/plugins/cdh/v5/deploy.py b/sahara/plugins/cdh/v5/deploy.py deleted file mode 100644 index b94938a8..00000000 --- a/sahara/plugins/cdh/v5/deploy.py +++ /dev/null @@ -1,262 +0,0 @@ -# Copyright (c) 2014 Mirantis Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sahara.i18n import _ -from sahara.plugins.cdh import commands as cmd -from sahara.plugins.cdh.v5 import cloudera_utils as cu -from sahara.plugins import utils as gu -from sahara.service.edp import hdfs_helper as h -from sahara.utils import cluster_progress_ops as cpo - - -def _step_description(x): - return { - 'step': gu.start_process_event_message(x), - 'param': ('cluster', 0) - } - - -PACKAGES = [ - 'cloudera-manager-agent', - 'cloudera-manager-daemons', - 'cloudera-manager-server', - 'cloudera-manager-server-db-2', - 'hadoop-hdfs-datanode', - 'hadoop-hdfs-namenode', - 'hadoop-hdfs-secondarynamenode', - 'hadoop-mapreduce', - 'hadoop-mapreduce-historyserver', - 'hadoop-yarn-nodemanager', - 'hadoop-yarn-resourcemanager', - 'hbase', - 'hive-hcatalog', - 'hive-metastore', - 'hive-server2', - 'hive-webhcat-server', - 'hue', - 'oozie', - 'oracle-j2sdk1.7', - 'spark-core', - 'unzip', - 'zookeeper' -] - -CU = cu.ClouderaUtilsV5() - - -def configure_cluster(cluster): - instances = gu.get_instances(cluster) - - if not cmd.is_pre_installed_cdh(CU.pu.get_manager(cluster).remote()): - CU.pu.configure_os(instances) - CU.pu.install_packages(instances, PACKAGES) - - CU.pu.start_cloudera_agents(instances) - CU.pu.start_cloudera_manager(cluster) - CU.update_cloudera_password(cluster) - CU.await_agents(cluster, instances) - CU.create_mgmt_service(cluster) - CU.create_services(cluster) - CU.configure_services(cluster) - CU.configure_instances(instances, cluster) - CU.deploy_configs(cluster) - CU.pu.configure_swift(cluster) - - -@cpo.event_wrapper( - True, step=_("Start roles: NODEMANAGER, DATANODE"), param=('cluster', 0)) -def _start_roles(cluster, instances): - for instance in instances: - if 'HDFS_DATANODE' in instance.node_group.node_processes: - hdfs = CU.get_service_by_role('DATANODE', instance=instance) - CU.start_roles(hdfs, CU.pu.get_role_name(instance, 'DATANODE')) - - if 'YARN_NODEMANAGER' in instance.node_group.node_processes: - yarn = CU.get_service_by_role('NODEMANAGER', instance=instance) - CU.start_roles(yarn, CU.pu.get_role_name(instance, 'NODEMANAGER')) - - -def scale_cluster(cluster, instances): - if not instances: - return - - if not cmd.is_pre_installed_cdh(instances[0].remote()): - CU.pu.configure_os(instances) - CU.pu.install_packages(instances, PACKAGES) - - CU.pu.start_cloudera_agents(instances) - CU.await_agents(cluster, instances) - CU.configure_instances(instances) - CU.pu.configure_swift(cluster, instances) - CU.update_configs(instances) - CU.refresh_datanodes(cluster) - _start_roles(cluster, instances) - - -def decommission_cluster(cluster, instances): - dns = [] - nms = [] - for i in instances: - if 'HDFS_DATANODE' in i.node_group.node_processes: - dns.append(CU.pu.get_role_name(i, 'DATANODE')) - if 'YARN_NODEMANAGER' in i.node_group.node_processes: - nms.append(CU.pu.get_role_name(i, 'NODEMANAGER')) - - if dns: - CU.decommission_nodes(cluster, 'DATANODE', dns) - - if nms: - CU.decommission_nodes(cluster, 'NODEMANAGER', nms) - - CU.delete_instances(cluster, instances) - - CU.refresh_datanodes(cluster) - CU.refresh_yarn_nodes(cluster) - - -@cpo.event_wrapper(True, **_step_description("Zookeeper")) -def start_zookeeper(cluster, cm_cluster): - # Cluster cannot be removed from args list, because it used inside - # event wrapper - zookeeper = cm_cluster.get_service(CU.ZOOKEEPER_SERVICE_NAME) - CU.start_service(zookeeper) - - -@cpo.event_wrapper(True, **_step_description("HDFS")) -def start_hdfs(cluster, cm_cluster): - # Cluster cannot be removed from args list, because it used inside - # event wrapper - hdfs = cm_cluster.get_service(CU.HDFS_SERVICE_NAME) - CU.format_namenode(hdfs) - CU.start_service(hdfs) - CU.create_hdfs_tmp(hdfs) - - -@cpo.event_wrapper(True, **_step_description("YARN")) -def start_yarn(cluster, cm_cluster): - # Cluster cannot be removed from args list, because it used inside - # event wrapper - yarn = cm_cluster.get_service(CU.YARN_SERVICE_NAME) - CU.create_yarn_job_history_dir(yarn) - CU.start_service(yarn) - - -@cpo.event_wrapper(True, **_step_description("Hive")) -def start_hive(cluster, cm_cluster): - hive = cm_cluster.get_service(CU.HIVE_SERVICE_NAME) - CU.pu.put_hive_hdfs_xml(cluster) - CU.pu.configure_hive(cluster) - CU.pu.create_hive_hive_directory(cluster) - CU.create_hive_metastore_db(hive) - CU.create_hive_dirs(hive) - CU.start_service(hive) - - -@cpo.event_wrapper(True, **_step_description("Oozie")) -def start_oozie(cluster, cm_cluster): - CU.pu.install_extjs(cluster) - oozie = cm_cluster.get_service(CU.OOZIE_SERVICE_NAME) - CU.create_oozie_db(oozie) - CU.install_oozie_sharelib(oozie) - CU.start_service(oozie) - - -@cpo.event_wrapper(True, **_step_description("Hue")) -def start_hue(cluster, cm_cluster): - # Cluster cannot be removed from args list, because it used inside - # event wrapper - hue = cm_cluster.get_service(CU.HUE_SERVICE_NAME) - CU.start_service(hue) - - -@cpo.event_wrapper(True, **_step_description("Spark")) -def start_spark_historyserver(cluster, cm_cluster): - CU.pu.configure_spark(cluster) - spark = cm_cluster.get_service(CU.SPARK_SERVICE_NAME) - CU.start_service(spark) - - -@cpo.event_wrapper(True, **_step_description("HBase master")) -def start_hbase_master(cluster, cm_cluster): - # Cluster cannot be removed from args list, because it used inside - # event wrapper - hbase = cm_cluster.get_service(CU.HBASE_SERVICE_NAME) - CU.create_hbase_root(hbase) - CU.start_service(hbase) - - -def start_cluster(cluster): - cm_cluster = CU.get_cloudera_cluster(cluster) - - if len(CU.pu.get_zookeepers(cluster)) > 0: - start_zookeeper(cluster, cm_cluster) - - start_hdfs(cluster, cm_cluster) - start_yarn(cluster, cm_cluster) - - if CU.pu.get_hive_metastore(cluster): - start_hive(cluster, cm_cluster) - - if CU.pu.get_oozie(cluster): - start_oozie(cluster, cm_cluster) - - if CU.pu.get_hue(cluster): - start_hue(cluster, cm_cluster) - - if CU.pu.get_spark_historyserver(cluster): - start_spark_historyserver(cluster, cm_cluster) - - if CU.pu.get_hbase_master(cluster): - start_hbase_master(cluster, cm_cluster) - - create_hbase_common_lib(cluster) - - -@cpo.event_wrapper( - True, step=_("Create HBase common lib"), param=('cluster', 0)) -def create_hbase_common_lib(cluster): - server = CU.pu.get_hbase_master(cluster) - if CU.pu.c_helper.is_hbase_common_lib_enabled(cluster) and server: - with server.remote() as r: - h.create_hbase_common_lib(r) - - -def get_open_ports(node_group): - ports = [9000] # for CM agent - - ports_map = { - 'CLOUDERA_MANAGER': [7180, 7182, 7183, 7432, 7184, 8084, 8086, 10101, - 9997, 9996, 8087, 9998, 9999, 8085, 9995, 9994], - 'HDFS_NAMENODE': [8020, 8022, 50070, 50470], - 'HDFS_SECONDARYNAMENODE': [50090, 50495], - 'HDFS_DATANODE': [50010, 1004, 50075, 1006, 50020], - 'YARN_RESOURCEMANAGER': [8030, 8031, 8032, 8033, 8088], - 'YARN_NODEMANAGER': [8040, 8041, 8042], - 'YARN_JOBHISTORY': [10020, 19888], - 'HIVE_METASTORE': [9083], - 'HIVE_SERVER2': [10000], - 'HUE_SERVER': [8888], - 'OOZIE_SERVER': [11000, 11001], - 'SPARK_YARN_HISTORY_SERVER': [18088], - 'ZOOKEEPER_SERVER': [2181, 3181, 4181, 9010], - 'HBASE_MASTER': [60000], - 'HBASE_REGIONSERVER': [60020] - } - - for process in node_group.node_processes: - if process in ports_map: - ports.extend(ports_map[process]) - - return ports diff --git a/sahara/plugins/cdh/v5/edp_engine.py b/sahara/plugins/cdh/v5/edp_engine.py deleted file mode 100644 index 8e5b949f..00000000 --- a/sahara/plugins/cdh/v5/edp_engine.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2014 Mirantis Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sahara.plugins.cdh import confighints_helper as ch_helper -from sahara.plugins.cdh import edp_engine -from sahara.plugins.cdh.v5 import cloudera_utils as cu -from sahara.service.edp.oozie import engine as oozie_engine -from sahara.utils import edp - - -class EdpOozieEngine(edp_engine.EdpOozieEngine): - - def __init__(self, cluster): - super(EdpOozieEngine, self).__init__(cluster) - self.cloudera_utils = cu.ClouderaUtilsV5() - - @staticmethod - def get_possible_job_config(job_type): - if edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE): - return {'job_config': ch_helper.get_possible_hive_config_from( - 'plugins/cdh/v5/resources/hive-site.xml')} - if edp.compare_job_type(job_type, - edp.JOB_TYPE_MAPREDUCE, - edp.JOB_TYPE_MAPREDUCE_STREAMING): - return {'job_config': ch_helper.get_possible_mapreduce_config_from( - 'plugins/cdh/v5/resources/mapred-site.xml')} - if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG): - return {'job_config': ch_helper.get_possible_pig_config_from( - 'plugins/cdh/v5/resources/mapred-site.xml')} - return oozie_engine.OozieJobEngine.get_possible_job_config(job_type) diff --git a/sahara/plugins/cdh/v5/plugin_utils.py b/sahara/plugins/cdh/v5/plugin_utils.py deleted file mode 100644 index 82a03e0d..00000000 --- a/sahara/plugins/cdh/v5/plugin_utils.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2014 Intel Corporation. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sahara.plugins.cdh import plugin_utils as pu -from sahara.plugins.cdh.v5 import config_helper -from sahara.plugins.cdh.v5 import db_helper - - -class PluginUtilsV5(pu.AbstractPluginUtils): - - def __init__(self): - self.c_helper = config_helper.ConfigHelperV5() - self.db_helper = db_helper - - def configure_spark(self, cluster): - spark = self.get_spark_historyserver(cluster) - with spark.remote() as r: - r.execute_command( - 'sudo su - -c "hdfs dfs -mkdir -p ' - '/user/spark/applicationHistory" hdfs') - r.execute_command( - 'sudo su - -c "hdfs dfs -mkdir -p ' - '/user/spark/share/lib" hdfs') - r.execute_command( - 'sudo su - -c "hdfs dfs -put /usr/lib/spark/assembly/lib/' - 'spark-assembly-hadoop* ' - '/user/spark/share/lib/spark-assembly.jar" hdfs') - r.execute_command( - 'sudo su - -c "hdfs dfs -chown -R ' - 'spark:spark /user/spark" hdfs') - r.execute_command( - 'sudo su - -c "hdfs dfs -chmod 0751 /user/spark" hdfs') - r.execute_command( - 'sudo su - -c "hdfs dfs -chmod 1777 /user/spark/' - 'applicationHistory" hdfs') - - def create_hive_hive_directory(self, cluster): - # Hive requires /tmp/hive-hive directory - namenode = self.get_namenode(cluster) - with namenode.remote() as r: - r.execute_command( - 'sudo su - -c "hadoop fs -mkdir -p /tmp/hive-hive" hdfs') - r.execute_command( - 'sudo su - -c "hadoop fs -chown hive /tmp/hive-hive" hdfs') diff --git a/sahara/plugins/cdh/v5/resources/cdh_config.py b/sahara/plugins/cdh/v5/resources/cdh_config.py deleted file mode 100644 index 9a9cd676..00000000 --- a/sahara/plugins/cdh/v5/resources/cdh_config.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) 2014 Mirantis Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from oslo_serialization import jsonutils as json -import six - -from sahara.plugins.cdh.client import api_client - - -# -- cm config -- - -cm_address = 'localhost' -cm_port = 7180 -cm_username = 'admin' -cm_password = 'admin' - -hdfs_service_name = 'hdfs01' -yarn_service_name = 'yarn01' -oozie_service_name = 'oozie01' -hive_service_name = 'hive01' -hue_service_name = 'hue01' -spark_service_name = 'spark_on_yarn01' -zookeeper_service_name = 'zookeeper01' -hbase_service_name = 'hbase01' - - -def get_cm_api(): - return api_client.ApiResource(cm_address, server_port=cm_port, - username=cm_username, password=cm_password) - - -def get_cluster(api): - return api.get_all_clusters()[0] - - -def process_service(service, service_name): - for role_cfgs in service.get_all_role_config_groups(): - role_cm_cfg = role_cfgs.get_config(view='full') - role_cfg = parse_config(role_cm_cfg) - role_name = role_cfgs.displayName.split(' ')[0].lower() - write_cfg(role_cfg, '%s-%s.json' % (service_name, role_name)) - - service_cm_cfg = service.get_config(view='full')[0] - service_cfg = parse_config(service_cm_cfg) - write_cfg(service_cfg, '%s-service.json' % service_name) - - -def parse_config(config): - cfg = [] - for name, value in six.iteritems(config): - p = { - 'name': value.name, - 'value': value.default, - 'display_name': value.displayName, - 'desc': value.description - } - cfg.append(p) - - return cfg - - -def write_cfg(cfg, file_name): - to_write = json.dumps(cfg, sort_keys=True, indent=4, - separators=(',', ': ')) - - with open(file_name, 'w') as f: - f.write(to_write) - - -def main(): - client = get_cm_api() - cluster = get_cluster(client) - - hdfs = cluster.get_service(hdfs_service_name) - process_service(hdfs, 'hdfs') - - yarn = cluster.get_service(yarn_service_name) - process_service(yarn, 'yarn') - - oozie = cluster.get_service(oozie_service_name) - process_service(oozie, 'oozie') - - hive = cluster.get_service(hive_service_name) - process_service(hive, 'hive') - - hue = cluster.get_service(hue_service_name) - process_service(hue, 'hue') - - spark = cluster.get_service(spark_service_name) - process_service(spark, 'spark') - - zookeeper = cluster.get_service(zookeeper_service_name) - process_service(zookeeper, 'zookeeper') - - hbase = cluster.get_service(hbase_service_name) - process_service(hbase, 'hbase') - -if __name__ == '__main__': - main() diff --git a/sahara/plugins/cdh/v5/resources/cdh_config.sh b/sahara/plugins/cdh/v5/resources/cdh_config.sh deleted file mode 100755 index 4cfb7b7a..00000000 --- a/sahara/plugins/cdh/v5/resources/cdh_config.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -tox -evenv -- python $(dirname $0)/cdh_config.py $* diff --git a/sahara/plugins/cdh/v5/resources/create_hive_db.sql b/sahara/plugins/cdh/v5/resources/create_hive_db.sql deleted file mode 100644 index 7545eeb7..00000000 --- a/sahara/plugins/cdh/v5/resources/create_hive_db.sql +++ /dev/null @@ -1,4 +0,0 @@ -CREATE ROLE hive LOGIN PASSWORD '%s'; -CREATE DATABASE metastore OWNER hive encoding 'UTF8'; -GRANT ALL PRIVILEGES ON DATABASE metastore TO hive; -COMMIT; diff --git a/sahara/plugins/cdh/v5/resources/hbase-master.json b/sahara/plugins/cdh/v5/resources/hbase-master.json deleted file mode 100644 index fcc20001..00000000 --- a/sahara/plugins/cdh/v5/resources/hbase-master.json +++ /dev/null @@ -1,326 +0,0 @@ -[ - { - "desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.", - "display_name": "Log Directory Free Space Monitoring Percentage Thresholds", - "name": "log_directory_free_space_percentage_thresholds", - "value": "{\"critical\":\"never\",\"warning\":\"never\"}" - }, - { - "desc": "

This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.

Each rule has some or all of the following fields:


Example:{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}

This rule will send events to Cloudera Manager for every StringIndexOutOfBoundsException, up to a maximum of 10 every minute.

", - "display_name": "Rules to Extract Events from Log Files", - "name": "log_event_whitelist", - "value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.IOException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketClosedException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.EOFException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.nio.channels.CancelledKeyException\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"IPC Server handler.*ClosedChannelException\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"IPC Server Responder, call.*output error\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"Daughter regiondir does not exist: .*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"File.*might still be open.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"File.*might still be open.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"Moving table .+ state to enabled but was already enabled\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"Received OPENED for region.*but region was in the state.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Unknown job [^ ]+ being deleted.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Error executing shell command .+ No such process.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\".*attempt to override final parameter.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"[^ ]+ is a deprecated filesystem name. Use.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}\n" - }, - { - "desc": "The amount of time allowed after this role is started that failures of health checks that rely on communication with this role will be tolerated.", - "display_name": "Health Check Startup Tolerance", - "name": "master_startup_tolerance", - "value": "5" - }, - { - "desc": "Whether or not periodic stacks collection is enabled.", - "display_name": "Stacks Collection Enabled", - "name": "stacks_collection_enabled", - "value": "false" - }, - { - "desc": "Number of pooled threads to handle region closing in the master.", - "display_name": "Region Closing Threads", - "name": "hbase_master_executor_closeregion_threads", - "value": "5" - }, - { - "desc": "When computing the overall Master health, consider the host's health.", - "display_name": "Master Host Health Test", - "name": "master_host_health_enabled", - "value": "true" - }, - { - "desc": "Number of pooled threads to handle the recovery of the region servers in the master.", - "display_name": "RegionServer Recovery Threads", - "name": "hbase_master_executor_serverops_threads", - "value": "5" - }, - { - "desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.", - "display_name": "Cgroup I/O Weight", - "name": "rm_io_weight", - "value": "500" - }, - { - "desc": "Directory where Master will place its log files.", - "display_name": "Master Log Directory", - "name": "hbase_master_log_dir", - "value": "/var/log/hbase" - }, - { - "desc": "The health test thresholds on the duration of the metrics request to the web server.", - "display_name": "Web Metric Collection Duration", - "name": "master_web_metric_collection_thresholds", - "value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}" - }, - { - "desc": "Time period in seconds to reset long-running metrics (e.g. compactions). This is an HBase specific configuration.", - "display_name": "Extended Period", - "name": "hbase_metrics_extended_period", - "value": "3600" - }, - { - "desc": "For advanced use only, a string to be inserted into log4j.properties for this role only.", - "display_name": "Master Logging Advanced Configuration Snippet (Safety Valve)", - "name": "log4j_safety_valve", - "value": null - }, - { - "desc": "The method that will be used to collect stacks. The jstack option involves periodically running the jstack command against the role's daemon process. The servlet method is available for those roles that expose an HTTP server endpoint exposing the current stacks traces of all threads. When the servlet method is selected that HTTP endpoint is periodically scraped.", - "display_name": "Stacks Collection Method", - "name": "stacks_collection_method", - "value": "jstack" - }, - { - "desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.", - "display_name": "File Descriptor Monitoring Thresholds", - "name": "master_fd_thresholds", - "value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}" - }, - { - "desc": "The port that the HBase Master binds to.", - "display_name": "HBase Master Port", - "name": "hbase_master_port", - "value": "60000" - }, - { - "desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.", - "display_name": "Web Metric Collection", - "name": "master_web_metric_collection_enabled", - "value": "true" - }, - { - "desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.", - "display_name": "Automatically Restart Process", - "name": "process_auto_restart", - "value": "false" - }, - { - "desc": "List of org.apache.hadoop.hbase.coprocessor.MasterObserver coprocessors that are loaded by default on the active HMaster process. For any implemented coprocessor methods, the listed classes will be called in order. After implementing your own MasterObserver, just put it in HBase's classpath and add the fully qualified class name here.", - "display_name": "HBase Coprocessor Master Classes", - "name": "hbase_coprocessor_master_classes", - "value": "" - }, - { - "desc": "

The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.

Each trigger has all of the following fields:

For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:

[{\"triggerName\": \"sample-trigger\",\n  \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:bad\",\n  \"streamThreshold\": 0, \"enabled\": \"true\"}]

Consult the trigger rules documentation for more details on how to write triggers using tsquery.

The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.

", - "display_name": "Role Triggers", - "name": "role_triggers", - "value": "[]" - }, - { - "desc": "Maximum size in bytes for the Java Process heap memory. Passed to Java -Xmx.", - "display_name": "Java Heap Size of HBase Master in Bytes", - "name": "hbase_master_java_heapsize", - "value": "1073741824" - }, - { - "desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.", - "display_name": "Cgroup Memory Soft Limit", - "name": "rm_memory_soft_limit", - "value": "-1" - }, - { - "desc": "The health test thresholds for the weighted average time spent in Java garbage collection. Specified as a percentage of elapsed wall clock time.", - "display_name": "Garbage Collection Duration Thresholds", - "name": "master_gc_duration_thresholds", - "value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}" - }, - { - "desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.", - "display_name": "Cgroup Memory Hard Limit", - "name": "rm_memory_hard_limit", - "value": "-1" - }, - { - "desc": "When true, HBase Master will bind to 0.0.0.0. Only available with CDH 4.3 and later.", - "display_name": "HBase Master Bind to Wildcard Address", - "name": "hbase_master_bind_to_wildcard_address", - "value": "true" - }, - { - "desc": "Enables the health test that a client can connect to the HBase Master", - "display_name": "HBase Master Canary Health Test", - "name": "master_canary_health_enabled", - "value": "true" - }, - { - "desc": "The period to review when computing unexpected exits.", - "display_name": "Unexpected Exits Monitoring Period", - "name": "unexpected_exits_window", - "value": "5" - }, - { - "desc": "Advanced Configuration Snippet (Safety Valve) for Hadoop Metrics2. Properties will be inserted into hadoop-metrics2.properties.", - "display_name": "Hadoop Metrics2 Advanced Configuration Snippet (Safety Valve)", - "name": "hadoop_metrics2_safety_valve", - "value": null - }, - { - "desc": "The amount of stacks data that will be retained. After the retention limit is hit, the oldest data will be deleted.", - "display_name": "Stacks Collection Data Retention", - "name": "stacks_collection_data_retention", - "value": "104857600" - }, - { - "desc": "The port for the HBase Master web UI. Set to -1 to disable the HBase Master web UI.", - "display_name": "HBase Master Web UI Port", - "name": "hbase_master_info_port", - "value": "60010" - }, - { - "desc": "The period to review when computing the moving average of garbage collection time.", - "display_name": "Garbage Collection Duration Monitoring Period", - "name": "master_gc_duration_window", - "value": "5" - }, - { - "desc": "For advanced use only, a string to be inserted into hbase-site.xml for this role only.", - "display_name": "Master Advanced Configuration Snippet (Safety Valve) for hbase-site.xml", - "name": "hbase_master_config_safety_valve", - "value": null - }, - { - "desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.", - "display_name": "Log Directory Free Space Monitoring Absolute Thresholds", - "name": "log_directory_free_space_absolute_thresholds", - "value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}" - }, - { - "desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.", - "display_name": "Maximum Process File Descriptors", - "name": "rlimit_fds", - "value": null - }, - { - "desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.", - "display_name": "Java Configuration Options for HBase Master", - "name": "hbase_master_java_opts", - "value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled" - }, - { - "desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold", - "display_name": "Enable Health Alerts for this Role", - "name": "enable_alerts", - "value": "true" - }, - { - "desc": "The maximum size, in megabytes, per log file for Master logs. Typically used by log4j.", - "display_name": "Master Max Log Size", - "name": "max_log_size", - "value": "200" - }, - { - "desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.", - "display_name": "Kill When Out of Memory", - "name": "oom_sigkill_enabled", - "value": "true" - }, - { - "desc": "Maximum time an HLog remains in the .oldlogdir directory until an HBase Master thread deletes it.", - "display_name": "Maximum Time to Keep HLogs", - "name": "hbase_master_logcleaner_ttl", - "value": "60000" - }, - { - "desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.", - "display_name": "Dump Heap When Out of Memory", - "name": "oom_heap_dump_enabled", - "value": "false" - }, - { - "desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.", - "display_name": "Unexpected Exits Thresholds", - "name": "unexpected_exits_thresholds", - "value": "{\"critical\":\"any\",\"warning\":\"never\"}" - }, - { - "desc": "Number of RPC Server instances spun up on HBase Master.", - "display_name": "HBase Master Handler Count", - "name": "hbase_master_handler_count", - "value": "25" - }, - { - "desc": "The address for the HBase Master web UI", - "display_name": "HBase Master Web UI Address", - "name": "hbase_master_info_bindAddress", - "value": null - }, - { - "desc": "A comma-separated list of LogCleanerDelegate(s) that are used in LogsCleaner. WAL/HLog cleaner(s) are called in order, so put the log cleaner that prunes the most log files in the front. To implement your own LogCleanerDelegate, add it to HBase's classpath and add the fully-qualified class name here. You should always add the above default log cleaners in the list, unless you have a special reason not to.", - "display_name": "HBase Master Log Cleaner Plugins", - "name": "hbase_master_logcleaner_plugins", - "value": null - }, - { - "desc": "The directory in which stacks logs will be placed. If not set, stacks will be logged into a stackssubdirectory of the role's log directory.", - "display_name": "Stacks Collection Directory", - "name": "stacks_collection_directory", - "value": null - }, - { - "desc": "The maximum number of rolled log files to keep for Master logs. Typically used by log4j.", - "display_name": "Master Maximum Log File Backups", - "name": "max_log_backup_index", - "value": "10" - }, - { - "desc": "The host name or IP address of the DNS name server which an HBase Master should use to determine the host name used for communication and display purposes.", - "display_name": "HBase Master DNS Name Server", - "name": "hbase_master_dns_nameserver", - "value": null - }, - { - "desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.", - "display_name": "Cgroup CPU Shares", - "name": "rm_cpu_shares", - "value": "1024" - }, - { - "desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.", - "display_name": "Heap Dump Directory", - "name": "oom_heap_dump_dir", - "value": "/tmp" - }, - { - "desc": "Number of pooled threads to handle region opening in the master.", - "display_name": "Region Opening Threads", - "name": "hbase_master_executor_openregion_threads", - "value": "5" - }, - { - "desc": "The frequency with which stacks will be collected.", - "display_name": "Stacks Collection Frequency", - "name": "stacks_collection_frequency", - "value": "5.0" - }, - { - "desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.", - "display_name": "Enable Configuration Change Alerts", - "name": "enable_config_alerts", - "value": "false" - }, - { - "desc": "The name of the DNS network interface from which an HBase Master should report its IP address.", - "display_name": "HBase Master DNS Network Interface", - "name": "hbase_master_dns_interface", - "value": null - }, - { - "desc": "The minimum log level for Master logs", - "display_name": "Master Logging Threshold", - "name": "log_threshold", - "value": "INFO" - }, - { - "desc": "Enables the health test that the Master's process state is consistent with the role configuration", - "display_name": "Master Process Health Test", - "name": "master_scm_health_enabled", - "value": "true" - } -] \ No newline at end of file diff --git a/sahara/plugins/cdh/v5/resources/hbase-regionserver.json b/sahara/plugins/cdh/v5/resources/hbase-regionserver.json deleted file mode 100644 index b6a5f94d..00000000 --- a/sahara/plugins/cdh/v5/resources/hbase-regionserver.json +++ /dev/null @@ -1,542 +0,0 @@ -[ - { - "desc": "For advanced use only, a string to be inserted into hbase-site.xml for this role only.", - "display_name": "RegionServer Advanced Configuration Snippet (Safety Valve) for hbase-site.xml", - "name": "hbase_regionserver_config_safety_valve", - "value": null - }, - { - "desc": "Whether or not periodic stacks collection is enabled.", - "display_name": "Stacks Collection Enabled", - "name": "stacks_collection_enabled", - "value": "false" - }, - { - "desc": "Maximum number of HStoreFiles to compact per minor compaction.", - "display_name": "Maximum Number of HStoreFiles Compaction", - "name": "hbase_hstore_compaction_max", - "value": null - }, - { - "desc": "The address for the HBase RegionServer web UI", - "display_name": "HBase RegionServer Web UI Address", - "name": "hbase_regionserver_info_bindAddress", - "value": null - }, - { - "desc": "The method that will be used to collect stacks. The jstack option involves periodically running the jstack command against the role's daemon process. The servlet method is available for those roles that expose an HTTP server endpoint exposing the current stacks traces of all threads. When the servlet method is selected that HTTP endpoint is periodically scraped.", - "display_name": "Stacks Collection Method", - "name": "stacks_collection_method", - "value": "jstack" - }, - { - "desc": "Number of threads to use while loading and unloading regions to or from a RegionServer. Can be used to increase the speed of decommissioning or rolling restart operations.", - "display_name": "Region Mover Threads", - "name": "hbase_regionserver_regionmover_thread_count", - "value": "1" - }, - { - "desc": "The health test thresholds of the average size of the HBase RegionServer flush queue over a recent period. See HBase RegionServer Flush Queue Monitoring Period.", - "display_name": "HBase RegionServer Flush Queue Monitoring Thresholds", - "name": "regionserver_flush_queue_thresholds", - "value": "{\"critical\":\"never\",\"warning\":\"10.0\"}" - }, - { - "desc": "The amount of time to wait for the HBase Region Server to fully start up and connect to the HBase Master before enforcing the connectivity check.", - "display_name": "HBase Region Server Connectivity Tolerance at Startup", - "name": "regionserver_connectivity_tolerance", - "value": "180" - }, - { - "desc": "The hashing algorithm for use in HashFunction. Two values are supported: 'murmur' (for MurmurHash) and 'jenkins' (for JenkinsHash).", - "display_name": "HBase Hash Type", - "name": "hbase_hash_type", - "value": "murmur" - }, - { - "desc": "The port for the HBase RegionServer web UI. Set to -1 to disable RegionServer web UI.", - "display_name": "HBase RegionServer Web UI port", - "name": "hbase_regionserver_info_port", - "value": "60030" - }, - { - "desc": "The period to review when computing unexpected exits.", - "display_name": "Unexpected Exits Monitoring Period", - "name": "unexpected_exits_window", - "value": "5" - }, - { - "desc": "When memstores are being forced to flush to make room in memory, keep flushing until this amount is reached. If this amount is equal to 'hbase.regionserver.global.memstore.upperLimit', then minimum possible flushing will occur when updates are blocked due to memstore limiting.", - "display_name": "Low Watermark for Memstore Flush", - "name": "hbase_regionserver_global_memstore_lowerLimit", - "value": "0.38" - }, - { - "desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.", - "display_name": "Java Configuration Options for HBase RegionServer", - "name": "hbase_regionserver_java_opts", - "value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled" - }, - { - "desc": "For advanced use only, a string to be inserted into log4j.properties for this role only.", - "display_name": "RegionServer Logging Advanced Configuration Snippet (Safety Valve)", - "name": "log4j_safety_valve", - "value": null - }, - { - "desc": "The health test thresholds of the latency that the RegionServer sees for HDFS read operations", - "display_name": "HBase RegionServer HDFS Read Latency Thresholds", - "name": "regionserver_read_latency_thresholds", - "value": "{\"critical\":\"100.0\",\"warning\":\"50.0\"}" - }, - { - "desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold", - "display_name": "Enable Health Alerts for this Role", - "name": "enable_alerts", - "value": "true" - }, - { - "desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.", - "display_name": "Dump Heap When Out of Memory", - "name": "oom_heap_dump_enabled", - "value": "false" - }, - { - "desc": "The health test thresholds of the size used by the HStoreFile index. Specified as a percentage of the total heap size.", - "display_name": "Percentage of Heap Used by HStoreFile Index", - "name": "regionserver_store_file_idx_size_thresholds", - "value": "{\"critical\":\"never\",\"warning\":\"10.0\"}" - }, - { - "desc": "Maximum number of Write-Ahead Log (WAL) files. This value multiplied by HDFS Block Size (dfs.blocksize) is the size of the WAL that will need to be replayed when a server crashes. This value is inversely proportional to the frequency of flushes to disk.", - "display_name": "Maximum number of Write-Ahead Log (WAL) files", - "name": "hbase_regionserver_maxlogs", - "value": "32" - }, - { - "desc": "List of coprocessors that are loaded by default on all tables. For any override coprocessor method, these classes will be called in order. After implementing your own coprocessor, just put it in HBase's classpath and add the fully qualified class name here. A coprocessor can also be loaded on demand by setting HTableDescriptor.", - "display_name": "HBase Coprocessor Region Classes", - "name": "hbase_coprocessor_region_classes", - "value": "" - }, - { - "desc": "The health test thresholds for the weighted average time spent in Java garbage collection. Specified as a percentage of elapsed wall clock time.", - "display_name": "Garbage Collection Duration Thresholds", - "name": "regionserver_gc_duration_thresholds", - "value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}" - }, - { - "desc": "Blocks writes if the size of the memstore increases to the value of 'hbase.hregion.block.memstore' multiplied by the value of 'hbase.hregion.flush.size' bytes. This setting is useful for preventing runaway memstore during spikes in update traffic. Without an upper-bound, memstore fills such that when it flushes, the resultant process of flushing files take a long time to compact or split, or worse, an \"out of memory\" error occurs.", - "display_name": "HBase Memstore Block Multiplier", - "name": "hbase_hregion_memstore_block_multiplier", - "value": "2" - }, - { - "desc": "Sync the HLog to HDFS after this interval, in milliseconds, if it has not accumulated the number of HLog Entries specified to trigger a sync.", - "display_name": "Synch Interval of HLog Entries", - "name": "hbase_regionserver_optionallogflushinterval", - "value": "1000" - }, - { - "desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.", - "display_name": "Log Directory Free Space Monitoring Percentage Thresholds", - "name": "log_directory_free_space_percentage_thresholds", - "value": "{\"critical\":\"never\",\"warning\":\"never\"}" - }, - { - "desc": "

This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.

Each rule has some or all of the following fields:


Example:{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}

This rule will send events to Cloudera Manager for every StringIndexOutOfBoundsException, up to a maximum of 10 every minute.

", - "display_name": "Rules to Extract Events from Log Files", - "name": "log_event_whitelist", - "value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.IOException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketClosedException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.EOFException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.nio.channels.CancelledKeyException\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \"IPC Server handler.*took.*appending an edit to hlog.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"ABORTING region server serverName.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"DFSOutputStream ResponseProcessor exception.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"Error Recovery for block blk.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"Failed init\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"Problem renewing lease for DFSClient.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"remote error telling master we are up\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\", \"content\": \"Session.*for server.*closing socket connection and attempting reconnect.\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Error executing shell command .+ No such process.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\".*attempt to override final parameter.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"[^ ]+ is a deprecated filesystem name. Use.*\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^Starting .*compaction on region (.+)$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_COMPACTION_REGION_STARTED\", \"attribute:SEVERITY\": \"INFORMATIONAL\", \"group0\": \"REGION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^completed compaction on region (.+) after (.+)$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_COMPACTION_REGION_COMPLETED\", \"attribute:SEVERITY\": \"INFORMATIONAL\", \"group0\": \"REGION\", \"group1\": \"DURATION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^Starting compaction on (.+) in region (.+)$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_COMPACTION_COLUMN_FAMILY_STARTED\", \"attribute:SEVERITY\": \"INFORMATIONAL\", \"group0\": \"COLUMN_FAMILY\", \"group1\": \"REGION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^completed compaction: regionName=(.+), storeName=(.+), fileCount=(.+), fileSize=(.+), priority=(.+), time=(.+); duration=(.+)$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_COMPACTION_COLUMN_FAMILY_COMPLETED\", \"attribute:SEVERITY\": \"INFORMATIONAL\", \"group0\": \"REGION\", \"group1\": \"COLUMN_FAMILY\", \"group2\": \"FILE_COUNT\", \"group3\": \"FILE_SIZE\", \"group4\": \"PRIORITY\", \"group5\": \"REQUEST_TIME_NANOS\", \"group6\": \"DURATION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^Completed compaction: Request = regionName=(.+), storeName=(.+), fileCount=(.+), fileSize=(.+), priority=(.+), time=(.+); duration=(.+)$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_COMPACTION_COLUMN_FAMILY_COMPLETED\", \"attribute:SEVERITY\": \"INFORMATIONAL\", \"group0\": \"REGION\", \"group1\": \"COLUMN_FAMILY\", \"group2\": \"FILE_COUNT\", \"group3\": \"FILE_SIZE\", \"group4\": \"PRIORITY\", \"group5\": \"REQUEST_TIME_NANOS\", \"group6\": \"DURATION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^aborted compaction: regionName=(.+), storeName=(.+), fileCount=(.+), fileSize=(.+), priority=(.+), time=(.+); duration=(.+)$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_COMPACTION_COLUMN_FAMILY_ABORTED\", \"attribute:SEVERITY\": \"IMPORTANT\", \"group0\": \"REGION\", \"group1\": \"COLUMN_FAMILY\", \"group2\": \"FILE_COUNT\", \"group3\": \"FILE_SIZE\", \"group4\": \"PRIORITY\", \"group5\": \"REQUEST_TIME_NANOS\", \"group6\": \"DURATION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^Finished memstore flush of .+ for region (.+) in (.+), sequenceid=(.+), compaction requested=(.+)$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_FLUSH_COMPLETED\", \"attribute:SEVERITY\": \"INFORMATIONAL\", \"group0\": \"REGION\", \"group1\": \"DURATION\", \"group2\": \"SEQUENCE_ID\", \"group3\": \"COMPACTION_REQUESTED\" },\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^Flush of region (.+) due to global heap pressure$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_FLUSH_DUE_TO_HEAP_PRESSURE\", \"attribute:SEVERITY\": \"IMPORTANT\", \"group0\": \"REGION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"WARN\", \"content\": \"^Region (.+) has too many store files; delaying flush up to 90000ms$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_FLUSH_DELAYED_TOO_MANY_STORE_FILES\", \"attribute:SEVERITY\": \"CRITICAL\", \"group0\": \"REGION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^Starting split of region (.+)$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_SPLIT_STARTED\", \"attribute:SEVERITY\": \"INFORMATIONAL\", \"group0\": \"REGION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^Running rollback/cleanup of failed split of (.+);.+$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_SPLIT_ABORTED\", \"attribute:SEVERITY\": \"IMPORTANT\", \"group0\": \"REGION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^Region split, hbase:meta updated, and report to master. Parent=(.+), new regions: (.+,.*,.+), (.+,.*,.+). Split took (.+)$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_SPLIT_COMPLETED\", \"attribute:SEVERITY\": \"INFORMATIONAL\", \"group0\": \"REGION\", \"group1\": \"DAUGHTER_REGIONS\", \"group2\": \"DAUGHTER_REGIONS\", \"group3\": \"DURATION\"},\n {\"alert\": false, \"rate\": -1, \"threshold\":\"INFO\", \"content\": \"^Region split, META updated, and report to master. Parent=(.+), new regions: (.+,.*,.+), (.+,.*,.+). Split took (.+)$\", \"attribute:CATEGORY\": \"HBASE\", \"attribute:EVENTCODE\": \"EV_HBASE_SPLIT_COMPLETED\", \"attribute:SEVERITY\": \"INFORMATIONAL\", \"group0\": \"REGION\", \"group1\": \"DAUGHTER_REGIONS\", \"group2\": \"DAUGHTER_REGIONS\", \"group3\": \"DURATION\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}" - }, - { - "desc": "The host name or IP address of the DNS name server which a RegionServer should use to determine the host name used by the HBase Master for communication and display purposes.", - "display_name": "RegionServer DNS Nameserver", - "name": "hbase_regionserver_dns_nameserver", - "value": null - }, - { - "desc": "The health test thresholds of the total size of RegionServer's memstores. Specified as a percentage of the configured upper limit. See Maximum Size of All Memstores in RegionServer.", - "display_name": "HBase RegionServer Memstore Size Thresholds", - "name": "regionserver_memstore_size_thresholds", - "value": "{\"critical\":\"100.0\",\"warning\":\"95.0\"}" - }, - { - "desc": "Percentage of maximum heap (-Xmx setting) to allocate to block cache used by HFile/StoreFile. To disable, set this value to 0 .", - "display_name": "HFile Block Cache Size", - "name": "hfile_block_cache_size", - "value": "0.4" - }, - { - "desc": "The maximum size, in megabytes, per log file for RegionServer logs. Typically used by log4j.", - "display_name": "RegionServer Max Log Size", - "name": "max_log_size", - "value": "200" - }, - { - "desc": "

The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.

Each trigger has all of the following fields:

For example, here is a JSON formatted trigger configured for a DataNode that fires if the DataNode has more than 1500 file-descriptors opened:

[{\"triggerName\": \"sample-trigger\",\n  \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:bad\",\n  \"streamThreshold\": 0, \"enabled\": \"true\"}]

Consult the trigger rules documentation for more details on how to write triggers using tsquery.

The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.

", - "display_name": "Role Triggers", - "name": "role_triggers", - "value": "[]" - }, - { - "desc": "Memstore will be flushed to disk if size of the memstore exceeds this value in number of bytes. This value is checked by a thread that runs the frequency specified by hbase.server.thread.wakefrequency.", - "display_name": "HBase Memstore Flush Size", - "name": "hbase_hregion_memstore_flush_size", - "value": "134217728" - }, - { - "desc": "The HLog file reader implementation.", - "display_name": "HLog Reader Implementation", - "name": "hbase_regionserver_hlog_reader_impl", - "value": null - }, - { - "desc": "If this number of HStoreFiles in any one HStore is exceeded, then a compaction is run to rewrite all HStoreFiles files as one HStoreFile. (One HStoreFile is written per flush of memstore.) You can delay compaction by specifying a larger number, but the compaction will take longer when it does run. During a compaction, updates cannot be flushed to disk. Long compactions require memory sufficient to carry the logging of all updates across the duration of the compaction. If too large, clients timeout during compaction.", - "display_name": "HStore Compaction Threshold", - "name": "hbase_hstore_compactionThreshold", - "value": "3" - }, - { - "desc": "The period over which to compute the moving average of the HDFS sync latency of the HBase RegionServer.", - "display_name": "HBase RegionServer HDFS Sync Latency Monitoring Period", - "name": "regionserver_sync_latency_window", - "value": "5" - }, - { - "desc": "Advanced Configuration Snippet (Safety Valve) for Hadoop Metrics2. Properties will be inserted into hadoop-metrics2.properties.", - "display_name": "Hadoop Metrics2 Advanced Configuration Snippet (Safety Valve)", - "name": "hadoop_metrics2_safety_valve", - "value": null - }, - { - "desc": "The amount of stacks data that will be retained. After the retention limit is hit, the oldest data will be deleted.", - "display_name": "Stacks Collection Data Retention", - "name": "stacks_collection_data_retention", - "value": "104857600" - }, - { - "desc": "The maximum size byte array that should come from the MSLAB, in bytes.", - "display_name": "Maximum Byte Array from MSLAB Allocation Scheme", - "name": "hbase_hregion_memstore_mslab_max_allocation", - "value": "262144" - }, - { - "desc": "The health test thresholds for the latency of HDFS write operations that the RegionServer detects", - "display_name": "HBase RegionServer HDFS Sync Latency Thresholds", - "name": "regionserver_sync_latency_thresholds", - "value": "{\"critical\":\"5000.0\",\"warning\":\"500.0\"}" - }, - { - "desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.", - "display_name": "Unexpected Exits Thresholds", - "name": "unexpected_exits_thresholds", - "value": "{\"critical\":\"any\",\"warning\":\"never\"}" - }, - { - "desc": "Directory where RegionServer will place its log files.", - "display_name": "RegionServer Log Directory", - "name": "hbase_regionserver_log_dir", - "value": "/var/log/hbase" - }, - { - "desc": "The period of time that an HRegion will block updates after reaching the HStoreFile limit that is specified by 'hbase.hstore.blockingStoreFiles'. After this time has elapsed, the HRegion will stop blocking updates even if a compaction has not been completed.", - "display_name": "HStore Blocking Wait Time", - "name": "hbase_hstore_blockingWaitTime", - "value": "90000" - }, - { - "desc": "Timeout for Canary to perform its checks.", - "display_name": "Canary Timeout", - "name": "hbase_regionserver_canary_timeout", - "value": "15" - }, - { - "desc": "Duration between consecutive checks done by the Canary.", - "display_name": "Canary Interval", - "name": "hbase_regionserver_canary_interval", - "value": "6" - }, - { - "desc": "The lease period, in milliseconds, for the HBase RegionServer. Clients must report in within this period or else they are considered dead.", - "display_name": "HBase RegionServer Lease Period", - "name": "hbase_regionserver_lease_period", - "value": "60000" - }, - { - "desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.", - "display_name": "Cgroup CPU Shares", - "name": "rm_cpu_shares", - "value": "1024" - }, - { - "desc": "The minimum log level for RegionServer logs", - "display_name": "RegionServer Logging Threshold", - "name": "log_threshold", - "value": "INFO" - }, - { - "desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.", - "display_name": "Web Metric Collection", - "name": "regionserver_web_metric_collection_enabled", - "value": "true" - }, - { - "desc": "The name of the DNS Network Interface from which a RegionServer should report its IP address.", - "display_name": "RegionServer DNS Network Interface", - "name": "hbase_regionserver_dns_interface", - "value": null - }, - { - "desc": "The number of reservation blocks that are used to prevent unstable RegionServers caused by an OOME.", - "display_name": "RegionServer Reservation Blocks", - "name": "hbase_regionserver_nbreservationblocks", - "value": "4" - }, - { - "desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.", - "display_name": "File Descriptor Monitoring Thresholds", - "name": "regionserver_fd_thresholds", - "value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}" - }, - { - "desc": "The size of the chunks allocated by MSLAB, in bytes.", - "display_name": "Chunk Size Allocated by MSLAB Allocation Scheme", - "name": "hbase_hregion_memstore_mslab_chunksize", - "value": "2097152" - }, - { - "desc": "If the memstores in a region are this size or larger when closing, run a pre-flush process to clear out memstores before putting up the region closed flag and taking the region offline. On close, a flush process is run under the close flag up to empty memory. During this time, the region is offline and no writes are taken. If the memstore content is large, the flush process could take a long time to complete. The pre-flush process cleans out the bulk of the memstore before putting up the close flag and taking the region offline, so that the flush process that runs under the close flag has little to do.", - "display_name": "HBase Memstore Pre-close Flush Size", - "name": "hbase_hregion_preclose_flush_size", - "value": "5242880" - }, - { - "desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.", - "display_name": "Automatically Restart Process", - "name": "process_auto_restart", - "value": "false" - }, - { - "desc": "The period over which to compute the moving average of the HDFS read latency of the HBase RegionServer.", - "display_name": "HBase RegionServer HDFS Read Latency Monitoring Period", - "name": "regionserver_read_latency_window", - "value": "5" - }, - { - "desc": "An interface that is assignable to HRegionInterface. Used in HBase Client for opening a proxy to a remote HBase RegionServer.", - "display_name": "HBase RegionServer Interface Class", - "name": "hbase_regionserver_class", - "value": null - }, - { - "desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.", - "display_name": "Cgroup Memory Hard Limit", - "name": "rm_memory_hard_limit", - "value": "-1" - }, - { - "desc": "When true, HBase RegionServer will bind to 0.0.0.0. Only available in CDH 4.3 and later.", - "display_name": "HBase RegionServer Bind to Wildcard Address", - "name": "hbase_regionserver_bind_to_wildcard_address", - "value": "true" - }, - { - "desc": "The time, in milliseconds, between 'major' compactions of all HStoreFiles in a region. To disable automated major compactions, set this value to 0.", - "display_name": "HBase HRegion Major Compaction", - "name": "hbase_hregion_majorcompaction", - "value": "604800000" - }, - { - "desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.", - "display_name": "Log Directory Free Space Monitoring Absolute Thresholds", - "name": "log_directory_free_space_absolute_thresholds", - "value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}" - }, - { - "desc": "Enable MemStore-Local Allocation Buffer (MSLAB) Allocation Scheme. Note: This feature is experimental in CDH3.", - "display_name": "Enable MSLAB Allocation Scheme", - "name": "hbase_hregion_memstore_mslab_enabled", - "value": "true" - }, - { - "desc": "Enables the health test that the RegionServer is connected to the Master", - "display_name": "HBase RegionServer to Master Connectivity Test", - "name": "regionserver_master_connectivity_enabled", - "value": "true" - }, - { - "desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.", - "display_name": "Maximum Process File Descriptors", - "name": "rlimit_fds", - "value": "32768" - }, - { - "desc": "Limit for the number of regions after which no more region splitting should take place. This is not a hard limit for the number of regions but acts as a guideline for the RegionServer to stop splitting after a certain limit.", - "display_name": "HBase Region Split Limit", - "name": "hbase_regionserver_regionSplitLimit", - "value": "2147483647" - }, - { - "desc": "Enables the health test that the RegionServer's process state is consistent with the role configuration", - "display_name": "RegionServer Process Health Test", - "name": "regionserver_scm_health_enabled", - "value": "true" - }, - { - "desc": "Interval, in milliseconds, between messages from the RegionServer to the HBase Master. Use a high value such as 3000 for clusters that have more than 10 hosts.", - "display_name": "HBase RegionServer Message Interval", - "name": "hbase_regionserver_msginterval", - "value": "3000" - }, - { - "desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it doesn't exist. However, if this directory already exists, role user must have write access to this directory. If this directory is shared amongst multiple roles, it should have 1777 permissions. Note that the heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.", - "display_name": "Heap Dump Directory", - "name": "oom_heap_dump_dir", - "value": "/tmp" - }, - { - "desc": "The period over which to compute the moving average of the compaction queue size.", - "display_name": "HBase RegionServer Compaction Queue Monitoring Period", - "name": "regionserver_compaction_queue_window", - "value": "5" - }, - { - "desc": "The frequency with which stacks will be collected.", - "display_name": "Stacks Collection Frequency", - "name": "stacks_collection_frequency", - "value": "5.0" - }, - { - "desc": "The period to review when computing the moving average of garbage collection time.", - "display_name": "Garbage Collection Duration Monitoring Period", - "name": "regionserver_gc_duration_window", - "value": "5" - }, - { - "desc": "Time period in seconds to reset long-running metrics (e.g. compactions). This is an HBase specific configuration.", - "display_name": "Extended Period", - "name": "hbase_metrics_extended_period", - "value": "3600" - }, - { - "desc": "Comma-separated list of codecs that the RegionServer requires to start. Use this setting to make sure that all RegionServers joining a cluster are installed with a particular set of codecs.", - "display_name": "RegionServer Codecs", - "name": "hbase_regionserver_codecs", - "value": "" - }, - { - "desc": "Maximum size of all memstores in a RegionServer before new updates are blocked and flushes are forced.", - "display_name": "Maximum Size of All Memstores in RegionServer", - "name": "hbase_regionserver_global_memstore_upperLimit", - "value": "0.4" - }, - { - "desc": "Number of handlers for processing priority requests in a RegionServer.", - "display_name": "HBase RegionServer Meta-Handler Count", - "name": "hbase_regionserver_metahandler_count", - "value": "10" - }, - { - "desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.", - "display_name": "Cgroup Memory Soft Limit", - "name": "rm_memory_soft_limit", - "value": "-1" - }, - { - "desc": "Period, in milliseconds, at which to roll the commit log.", - "display_name": "HBase RegionServer Log Roll Period", - "name": "hbase_regionserver_logroll_period", - "value": "3600000" - }, - { - "desc": "The period over which to compute the moving average of the flush queue size.", - "display_name": "HBase RegionServer Flush Queue Monitoring Period", - "name": "regionserver_flush_queue_window", - "value": "5" - }, - { - "desc": "Maximum HStoreFile size. If any one of a column families' HStoreFiles has grown to exceed this value, the hosting HRegion is split in two.", - "display_name": "HBase Maximum File Size", - "name": "hbase_hregion_max_filesize", - "value": "10737418240" - }, - { - "desc": "The health test thresholds of the weighted average size of the HBase RegionServer compaction queue over a recent period. See HBase RegionServer Compaction Queue Monitoring Period.", - "display_name": "HBase RegionServer Compaction Queue Monitoring Thresholds", - "name": "regionserver_compaction_queue_thresholds", - "value": "{\"critical\":\"never\",\"warning\":\"10.0\"}" - }, - { - "desc": "If there are more than this number of HStoreFiles in any one HStore, then updates are blocked for this HRegion until a compaction is completed, or until the value specified for 'hbase.hstore.blockingWaitTime' has been exceeded.", - "display_name": "HStore Blocking Store Files", - "name": "hbase_hstore_blockingStoreFiles", - "value": "10" - }, - { - "desc": "The health test thresholds on the duration of the metrics request to the web server.", - "display_name": "Web Metric Collection Duration", - "name": "regionserver_web_metric_collection_thresholds", - "value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}" - }, - { - "desc": "The maximum number of rolled log files to keep for RegionServer logs. Typically used by log4j.", - "display_name": "RegionServer Maximum Log File Backups", - "name": "max_log_backup_index", - "value": "10" - }, - { - "desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.", - "display_name": "Kill When Out of Memory", - "name": "oom_sigkill_enabled", - "value": "true" - }, - { - "desc": "The HLog file writer implementation.", - "display_name": "HLog Writer Implementation", - "name": "hbase_regionserver_hlog_writer_impl", - "value": null - }, - { - "desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.", - "display_name": "Enable Configuration Change Alerts", - "name": "enable_config_alerts", - "value": "false" - }, - { - "desc": "The port that an HBase RegionServer binds to.", - "display_name": "HBase RegionServer Port", - "name": "hbase_regionserver_port", - "value": "60020" - }, - { - "desc": "Number of RPC Server instances spun up on RegionServers.", - "display_name": "HBase RegionServer Handler Count", - "name": "hbase_regionserver_handler_count", - "value": "30" - }, - { - "desc": "The directory in which stacks logs will be placed. If not set, stacks will be logged into a stackssubdirectory of the role's log directory.", - "display_name": "Stacks Collection Directory", - "name": "stacks_collection_directory", - "value": null - }, - { - "desc": "Maximum size in bytes for the Java Process heap memory. Passed to Java -Xmx.", - "display_name": "Java Heap Size of HBase RegionServer in Bytes", - "name": "hbase_regionserver_java_heapsize", - "value": "4294967296" - }, - { - "desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.", - "display_name": "Cgroup I/O Weight", - "name": "rm_io_weight", - "value": "500" - }, - { - "desc": "When computing the overall RegionServer health, consider the host's health.", - "display_name": "RegionServer Host Health Test", - "name": "regionserver_host_health_enabled", - "value": "true" - } -] \ No newline at end of file diff --git a/sahara/plugins/cdh/v5/resources/hbase-service.json b/sahara/plugins/cdh/v5/resources/hbase-service.json deleted file mode 100644 index 168d6546..00000000 --- a/sahara/plugins/cdh/v5/resources/hbase-service.json +++ /dev/null @@ -1,476 +0,0 @@ -[ - { - "desc": "Comma-delimited list of hosts where you want to allow the HBase user to impersonate other users. The default '*' allows all hosts. To disable entirely, use a string that does not correspond to a host name, such as '_no_host'. Note: This property is used only if HBase REST/Thrift Server Authentication is enabled.", - "display_name": "HBase Proxy User Hosts", - "name": "hbase_proxy_user_hosts_list", - "value": "*" - }, - { - "desc": "The user that this service's processes should run as.", - "display_name": "System User", - "name": "process_username", - "value": "hbase" - }, - { - "desc": "The frequency in which the log4j event publication appender will retry sending undelivered log events to the Event server, in seconds", - "display_name": "Log Event Retry Frequency", - "name": "log_event_retry_frequency", - "value": "30" - }, - { - "desc": "A general client pause time value. Used mostly as a time period to wait before retrying operations such as a failed get or region lookup.", - "display_name": "HBase Client Pause", - "name": "hbase_client_pause", - "value": "100" - }, - { - "desc": "Comma-delimited list of groups that you want to allow the HBase user to impersonate. The default '*' allows all groups. To disable entirely, use a string that does not correspond to a group name, such as '_no_group_'. Note: This property is used only if HBase REST/Thrift Server Authentication is enabled.", - "display_name": "HBase Proxy User Groups", - "name": "hbase_proxy_user_groups_list", - "value": "*" - }, - { - "desc": "Number of rows to fetch when calling next on a scanner if it is not served from memory. Higher caching values enable faster scanners but require more memory and some calls of next may take longer when the cache is empty.", - "display_name": "HBase Client Scanner Caching", - "name": "hbase_client_scanner_caching", - "value": "100" - }, - { - "desc": "

\nConfigures the rules for event tracking and coalescing. This feature is\nused to define equivalency between different audit events. When\nevents match, according to a set of configurable parameters, only one\nentry in the audit list is generated for all the matching events.\n

\n\n

\nTracking works by keeping a reference to events when they first appear,\nand comparing other incoming events against the \"tracked\" events according\nto the rules defined here.\n

\n\n

Event trackers are defined in a JSON object like the following:

\n\n
\n{\n  \"timeToLive\" : [integer],\n  \"fields\" : [\n    {\n      \"type\" : [string],\n      \"name\" : [string]\n    }\n  ]\n}\n
\n\n

\nWhere:\n

\n\n\n\n

\nEach field has an evaluator type associated with it. The evaluator defines\nhow the field data is to be compared. The following evaluators are\navailable:\n

\n\n\n\n

\nThe following is the list of fields that can used to compare HBase events:\n

\n\n", - "display_name": "Event Tracker", - "name": "navigator_event_tracker", - "value": "{\n \"comment\" : [\n \"Default event tracker for HBase services.\",\n \"Defines equality by comparing username, action, table name, family \",\n \"and qualifier of the events.\"\n ],\n \"timeToLive\" : 60000,\n \"fields\" : [\n { \"type\": \"value\", \"name\" : \"tableName\" },\n { \"type\": \"value\", \"name\" : \"family\" },\n { \"type\": \"value\", \"name\" : \"qualifier\" },\n { \"type\": \"value\", \"name\" : \"operation\" },\n { \"type\": \"username\", \"name\" : \"username\" }\n ]\n}\n" - }, - { - "desc": "Action to take when the audit event queue is full. Drop the event or shutdown the affected process.", - "display_name": "Queue Policy", - "name": "navigator_audit_queue_policy", - "value": "DROP" - }, - { - "desc": "Maximum number of errors that the HBase Hbck poller will retain through a given run", - "display_name": "HBase Hbck Poller Maximum Error Count", - "name": "hbase_hbck_poller_max_error_count", - "value": "10000" - }, - { - "desc": "Timeout for all HBase RPCs in milliseconds.", - "display_name": "RPC Timeout", - "name": "hbase_rpc_timeout", - "value": "60000" - }, - { - "desc": "Timeout (in ms) for the distributed log splitting manager to receive response from a worker.", - "display_name": "SplitLog Manager Timeout", - "name": "hbase_service_splitlog_manager_timeout", - "value": "120000" - }, - { - "desc": "

Event filters are defined in a JSON object like the following:

\n\n
\n{\n  \"defaultAction\" : (\"accept\", \"discard\"),\n  \"rules\" : [\n    {\n      \"action\" : (\"accept\", \"discard\"),\n      \"fields\" : [\n        {\n          \"name\" : \"fieldName\",\n          \"match\" : \"regex\"\n        }\n      ]\n    }\n  ]\n}\n
\n\n

\nA filter has a default action and a list of rules, in order of precedence.\nEach rule defines an action, and a list of fields to match against the\naudit event.\n

\n\n

\nA rule is \"accepted\" if all the listed field entries match the audit\nevent. At that point, the action declared by the rule is taken.\n

\n\n

\nIf no rules match the event, the default action is taken. Actions\ndefault to \"accept\" if not defined in the JSON object.\n

\n\n

\nThe following is the list of fields that can be filtered for HBase events:\n

\n\n", - "display_name": "Event Filter", - "name": "navigator_audit_event_filter", - "value": "{\n \"comment\" : [\n \"Default filter for HBase services.\",\n \"Discards events that affect the internal -ROOT-, .META. and _acl_ tables.\"\n ],\n \"defaultAction\" : \"accept\",\n \"rules\" : [\n {\n \"action\" : \"discard\",\n \"fields\" : [\n { \"name\" : \"tableName\", \"match\" : \"(?:-ROOT-|.META.|_acl_|hbase:meta|hbase:acl)\" }\n ]\n }\n ]\n}\n" - }, - { - "desc": "The tolerance window that will be used in HBase service tests that depend on detection of the active HBase Master.", - "display_name": "HBase Active Master Detection Window", - "name": "hbase_active_master_detecton_window", - "value": "3" - }, - { - "desc": "Allow indexing of tables in HBase by Lily HBase Indexer. Note: Replication must be enabled for indexing to work.", - "display_name": "Enable Indexing", - "name": "hbase_enable_indexing", - "value": "false" - }, - { - "desc": "Name of the HDFS service that this HBase service instance depends on", - "display_name": "HDFS Service", - "name": "hdfs_service", - "value": null - }, - { - "desc": "Period of time, in milliseconds, to pause between connection retries to ZooKeeper. Used together with ${zookeeper.retries} in an exponential backoff fashion when making queries to ZooKeeper.", - "display_name": "ZooKeeper Connection Retry Pause Duration", - "name": "zookeeper_pause", - "value": null - }, - { - "desc": "For advanced use only, a string to be inserted into hbase-site.xml. Applies to configurations of all roles in this service except client configuration.", - "display_name": "HBase Service Advanced Configuration Snippet (Safety Valve) for hbase-site.xml", - "name": "hbase_service_config_safety_valve", - "value": null - }, - { - "desc": "Enable SSL encryption for HBase web UIs", - "display_name": "Web UI SSL Encryption Enabled", - "name": "hbase_hadoop_ssl_enabled", - "value": "false" - }, - { - "desc": "The root znode for HBase in ZooKeeper. All of HBase's ZooKeeper files that are configured with a relative path will go under this node. By default, all of HBase's ZooKeeper file paths are configured with a relative path, so they will all go under this directory unless changed.", - "display_name": "ZooKeeper Znode Parent", - "name": "zookeeper_znode_parent", - "value": "/hbase" - }, - { - "desc": "AWS access key Id required to access S3 to store remote snapshots.", - "display_name": "AWS S3 access key Id for remote snapshots", - "name": "hbase_snapshot_s3_access_key_id", - "value": null - }, - { - "desc": "When computing the overall HBase cluster health, consider the active HBase Master's health.", - "display_name": "Active Master Health Test", - "name": "hbase_master_health_enabled", - "value": "true" - }, - { - "desc": "Maximum number of rolled over audit logs to retain. The logs will not be deleted if they contain audit events that have not yet been propagated to Audit Server.", - "display_name": "Number of Audit Logs to Retain", - "name": "navigator_audit_log_max_backup_index", - "value": "10" - }, - { - "desc": "Choose the authentication mechanism used by HBase", - "display_name": "HBase Secure Authentication", - "name": "hbase_security_authentication", - "value": "simple" - }, - { - "desc": "AWS secret access key required to access S3 to store remote snapshots.", - "display_name": "AWS S3 secret access key for remote snapshots", - "name": "hbase_snapshot_s3_secret_access_key", - "value": null - }, - { - "desc": "An alert is published if the HBase Hbck tool runs slowly.", - "display_name": "HBase Hbck Slow Run Alert Enabled", - "name": "hbase_hbck_slow_run_alert_enabled", - "value": "true" - }, - { - "desc": "Ratio of Lily HBase Indexers used by each HBase RegionServer while doing replication.", - "display_name": "Replication Source Ratio", - "name": "hbase_replication_source_ratio", - "value": "1.0" - }, - { - "desc": "Name of the ZooKeeper service that this HBase service instance depends on", - "display_name": "ZooKeeper Service", - "name": "zookeeper_service", - "value": null - }, - { - "desc": "Maximum size of audit log file in MB before it is rolled over.", - "display_name": "Maximum Audit Log File Size", - "name": "navigator_audit_log_max_file_size", - "value": "100" - }, - { - "desc": "For advanced use only, a string to be inserted into core-site.xml. Applies to configurations of all roles in this service except client configuration.", - "display_name": "HBase Service Advanced Configuration Snippet (Safety Valve) for core-site.xml", - "name": "hbase_core_site_safety_valve", - "value": null - }, - { - "desc": "Password for the server keystore file used for encrypted web UIs", - "display_name": "SSL Server Keystore File Password", - "name": "ssl_server_keystore_password", - "value": null - }, - { - "desc": "Size of the threadpool used for hedged reads in hdfs clients. If a read from a block is slow, a parallel 'hedged' read will be started against a different block replica. The first one to return with a result is used while the other one is cancelled. This 'hedged' read feature helps rein in the outliers. A value of zero disables the feature.", - "display_name": "HDFS Hedged Read Threadpool Size", - "name": "hbase_server_dfs_client_hedged_read_threadpool_size", - "value": "0" - }, - { - "desc": "Configures whether the Hbck poller checks HDFS or not. Checking HBase tables and regions information on HDFS can take a while.", - "display_name": "HBase Hbck Check HDFS", - "name": "hbase_hbck_poller_check_hdfs", - "value": "true" - }, - { - "desc": "If this is set to \"kerberos\", HBase REST Server will authenticate its clients. HBase Proxy User Hosts and Groups should be configured to allow specific users to access HBase through REST Server.", - "display_name": "HBase REST Authentication", - "name": "hbase_restserver_security_authentication", - "value": "simple" - }, - { - "desc": "Tables to exclude in the HBase Region Health Canary which will scan a row from every region.", - "display_name": "HBase Region Health Canary Exclude Tables", - "name": "hbase_region_health_canary_exclude_tables", - "value": "" - }, - { - "desc": "Specifies the combined maximum allowed size of a KeyValue instance. This option configures an upper boundary for a single entry saved in a storage file. This option prevents a region from splitting if the data is too large. Set this option to a fraction of the maximum region size. To disable this check, use a value of zero or less.", - "display_name": "Maximum Size of HBase Client KeyValue", - "name": "hbase_client_keyvalue_maxsize", - "value": "10485760" - }, - { - "desc": "Path to the directory where audit logs will be written. The directory will be created if it doesn't exist.", - "display_name": "Audit Log Directory", - "name": "audit_event_log_dir", - "value": "/var/log/hbase/audit" - }, - { - "desc": "Enable snapshots. Disabling snapshots requires deletion of all snapshots before restarting the HBase master; the HBase master will not start if snapshots are disabled and snapshots exist.", - "display_name": "Enable Snapshots", - "name": "hbase_snapshot_enabled", - "value": "true" - }, - { - "desc": "For advanced use only, key-value pairs (one on each line) to be inserted into a role's environment. Applies to configurations of all roles in this service except client configuration.", - "display_name": "HBase Service Environment Advanced Configuration Snippet (Safety Valve)", - "name": "hbase_service_env_safety_valve", - "value": null - }, - { - "desc": "Enables the canary that checks HBase region availability by scanning a row from every region.", - "display_name": "HBase Region Health Canary", - "name": "hbase_region_health_canary_enabled", - "value": "true" - }, - { - "desc": "Timeout for graceful shutdown of this HBase service. Once this timeout is reached, any remaining running roles are abruptly shutdown. A value of 0 means no timeout.", - "display_name": "Graceful Shutdown Timeout", - "name": "hbase_graceful_stop_timeout", - "value": "180" - }, - { - "desc": "An alert is published if the HBase Hbck tool detects at least this many regions with errors across all tables in this service. If the value is not set, alerts will not be published based on the count of regions with errors.", - "display_name": "HBase Hbck Region Error Count Alert Threshold", - "name": "hbase_hbck_alert_region_error_count_threshold", - "value": null - }, - { - "desc": "An alert is published if the HBase Hbck tool finds any errors with matching codes. Possible error codes: UNKNOWN, NO_META_REGION, NULL_ROOT_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META, NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED, MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE, FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS, HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION, ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE", - "display_name": "HBase Hbck Alert Error Codes", - "name": "hbase_hbck_alert_error_codes", - "value": "NO_META_REGION,NULL_ROOT_REGION" - }, - { - "desc": "AWS S3 path where remote snapshots should be stored.", - "display_name": "AWS S3 path for remote snapshots", - "name": "hbase_snapshot_s3_path", - "value": null - }, - { - "desc": "Maximum number of client retries. Used as a maximum for all operations such as fetching of the root region from the root RegionServer, getting a cell's value, and starting a row update.", - "display_name": "Maximum HBase Client Retries", - "name": "hbase_client_retries_number", - "value": "35" - }, - { - "desc": "An alert is published if the HBase Hbck tool detects at least this many errors across all tables in this service. Some errors are not associated with a region, e.g. 'RS_CONNECT_FAILURE'. If the value is not set, alerts will not be published based on the count of errors.", - "display_name": "HBase Hbck Error Count Alert Threshold", - "name": "hbase_hbck_alert_error_count_threshold", - "value": null - }, - { - "desc": "Enable collection of audit events from the service's roles.", - "display_name": "Enable Collection", - "name": "navigator_audit_enabled", - "value": "true" - }, - { - "desc": "The group that this service's processes should run as.", - "display_name": "System Group", - "name": "process_groupname", - "value": "hbase" - }, - { - "desc": "Set to true to use HBase Secure RPC Engine for remote procedure calls (RPC). This is only effective in simple authentication mode. Does not provide authentication for RPC calls, but provides user information in the audit logs. Changing this setting requires a restart of this and all dependent services and redeployment of client configurations, along with a restart of the Service Monitor management role.", - "display_name": "HBase Secure RPC Engine", - "name": "hbase_secure_rpc_engine", - "value": "false" - }, - { - "desc": "When set, Cloudera Manager will send alerts when the health of this service reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold", - "display_name": "Enable Service Level Health Alerts", - "name": "enable_alerts", - "value": "true" - }, - { - "desc": "The HDFS directory shared by HBase RegionServers", - "display_name": "HDFS Root Directory", - "name": "hdfs_rootdir", - "value": "/hbase" - }, - { - "desc": "Enable HDFS short circuit read. This allows a client co-located with the DataNode to read HDFS file blocks directly. This gives a performance boost to distributed clients that are aware of locality.", - "display_name": "Enable HDFS Short Circuit Read", - "name": "dfs_client_read_shortcircuit", - "value": "true" - }, - { - "desc": "For advanced use only, a string to be inserted into ssl-server.xml. Applies to configurations of all roles in this service except client configuration.", - "display_name": "HBase Service Advanced Configuration Snippet (Safety Valve) for ssl-server.xml", - "name": "hbase_ssl_server_safety_valve", - "value": null - }, - { - "desc": "Password that protects the private key contained in the server keystore used for encrypted web UIs", - "display_name": "SSL Server Keystore Key Password", - "name": "ssl_server_keystore_keypassword", - "value": null - }, - { - "desc": "Path to ZooKeeper Node holding root region location. This is written by the HBase Master and read by clients and RegionServers. If a relative path is given, the parent folder will be ${zookeeper.znode.parent}. By default, the root location is stored at /hbase/root-region-server.", - "display_name": "ZooKeeper Znode Rootserver", - "name": "zookeeper_znode_rootserver", - "value": "root-region-server" - }, - { - "desc": "When computing the overall HBase cluster health, consider the health of the backup HBase Masters.", - "display_name": "Backup Masters Health Test", - "name": "hbase_backup_masters_health_enabled", - "value": "true" - }, - { - "desc": "For advanced use only, a list of configuration properties that will be used by the Service Monitor instead of the current client configuration for the service.", - "display_name": "Service Monitor Client Config Overrides", - "name": "smon_client_config_overrides", - "value": "zookeeper.recovery.retry0zookeeper.recovery.retry.intervalmill3000hbase.zookeeper.recoverable.waittime1000zookeeper.session.timeout30000hbase.rpc.timeout10000hbase.client.retries.number1hbase.client.rpc.maxattempts1hbase.client.operation.timeout10000" - }, - { - "desc": "

The configured triggers for this service. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.

Each trigger has all of the following fields:

For example, here is a JSON formatted trigger that fires if there are more than 10 DataNodes with more than 500 file-descriptors opened:

[{\"triggerName\": \"sample-trigger\",\n  \"triggerExpression\": \"IF (SELECT fd_open WHERE roleType = DataNode and last(fd_open) > 500) DO health:bad\",\n  \"streamThreshold\": 10, \"enabled\": \"true\"}]

Consult the trigger rules documentation for more details on how to write triggers using tsquery.

The JSON format is evolving and may change in the future and as a result backward compatibility is not guaranteed between releases at this time.

", - "display_name": "Service Triggers", - "name": "service_triggers", - "value": "[]" - }, - { - "desc": "An alert is published if the HBase region health canary runs slowly.", - "display_name": "HBase Region Health Canary Slow Run Alert Enabled", - "name": "hbase_region_health_canary_slow_run_alert_enabled", - "value": "true" - }, - { - "desc": "Set to true to cause the hosting server (Master or RegionServer) to abort if a coprocessor throws a Throwable object that is not IOException or a subclass of IOException. Setting it to true might be useful in development environments where one wants to terminate the server as soon as possible to simplify coprocessor failure analysis.", - "display_name": "HBase Coprocessor Abort on Error", - "name": "hbase_coprocessor_abort_on_error", - "value": "false" - }, - { - "desc": "When set, each role identifies important log events and forwards them to Cloudera Manager.", - "display_name": "Enable Log Event Capture", - "name": "catch_events", - "value": "true" - }, - { - "desc": "Allow HBase tables to be replicated.", - "display_name": "Enable Replication", - "name": "hbase_enable_replication", - "value": "false" - }, - { - "desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.", - "display_name": "Enable Configuration Change Alerts", - "name": "enable_config_alerts", - "value": "false" - }, - { - "desc": "Path to the keystore file containing the server certificate and private key used for encrypted web UIs", - "display_name": "SSL Server Keystore File Location", - "name": "ssl_server_keystore_location", - "value": null - }, - { - "desc": "Enable HBase authorization", - "display_name": "HBase Secure Authorization", - "name": "hbase_security_authorization", - "value": "false" - }, - { - "desc": "Name of the scheduler pool to use for MR jobs created during export/import of remote snapshots in AWS S3.", - "display_name": "Scheduler pool for remote snapshots in AWS S3.", - "name": "hbase_snapshot_s3_scheduler_pool", - "value": null - }, - { - "desc": "Period of time, in milliseconds, to pause between searches for work. Used as a sleep interval by service threads such as a META scanner and log roller.", - "display_name": "HBase Server Thread Wake Frequency", - "name": "hbase_server_thread_wakefrequency", - "value": "10000" - }, - { - "desc": "An alert is published if the HBase region health canary detects at least this percentage of total regions are unhealthy. This threshold is used if the explicit count is not set via the hbase_canary_alert_unhealthy_region_count_threshold config.", - "display_name": "HBase Canary Unhealthy Region Percentage Alert Threshold", - "name": "hbase_canary_alert_unhealthy_region_percent_threshold", - "value": "0.1" - }, - { - "desc": "The health test thresholds of the overall RegionServer health. The check returns \"Concerning\" health if the percentage of \"Healthy\" RegionServers falls below the warning threshold. The check is unhealthy if the total percentage of \"Healthy\" and \"Concerning\" RegionServers falls below the critical threshold.", - "display_name": "Healthy RegionServer Monitoring Thresholds", - "name": "hbase_regionservers_healthy_thresholds", - "value": "{\"critical\":\"90.0\",\"warning\":\"95.0\"}" - }, - { - "desc": "For advanced use only, a string to be inserted into the client configuration for navigator.client.properties.", - "display_name": "HBASE Client Advanced Configuration Snippet (Safety Valve) for navigator.client.properties", - "name": "navigator_client_config_safety_valve", - "value": null - }, - { - "desc": "Duration to wait before starting up a 'hedged' read.", - "display_name": "HDFS Hedged Read Delay Threshold", - "name": "hbase_server_dfs_client_hedged_read_threshold_millis", - "value": "500" - }, - { - "desc": "Enables the HBase Hbck Poller so that Hbck reports will be available. Enabling the Hbck poller will increase the amount of memory used by the Service Monitor. Consider increasing the Service Monitor Java heap size by an additional 3KB per region. For example, for a cluster with 10,000 regions, increase the JVM heap size by approximately 30MB.", - "display_name": "HBase Hbck Poller", - "name": "hbase_hbck_poller_enabled", - "value": "false" - }, - { - "desc": "List of users or groups, who are allowed full privileges, regardless of stored ACLs, across the cluster. Only used when HBase security is enabled.", - "display_name": "HBase Superusers", - "name": "hbase_superuser", - "value": "" - }, - { - "desc": "Maximum number of hlog entries to replicate in one go. If this is large, and a consumer takes a while to process the events, the HBase RPC call will time out.", - "display_name": "Replication Batch Size", - "name": "hbase_replication_source_nb_capacity", - "value": "1000" - }, - { - "desc": "An alert is published if the HBase region health canary detects at least this many unhealthy regions. This setting takes precedence over the hbase_canary_alert_unhealthy_region_percent_threshold config.", - "display_name": "HBase Canary Unhealthy Region Count Alert Threshold", - "name": "hbase_canary_alert_unhealthy_region_count_threshold", - "value": null - }, - { - "desc": "The number of times to retry connections to ZooKeeper. Used for reading and writing root region location. Used together with ${zookeeper.pause} in an exponential backoff fashion when making queries to ZooKeeper.", - "display_name": "ZooKeeper Connection Retries", - "name": "zookeeper_retries", - "value": null - }, - { - "desc": "ZooKeeper session timeout in milliseconds. HBase passes this to the ZooKeeper quorum as the suggested maximum time for a session. See http://hadoop.apache.org/zookeeper/docs/current/zookeeperProgrammers.html#ch_zkSessions The client sends a requested timeout, the server responds with the timeout that it can give the client.", - "display_name": "ZooKeeper Session Timeout", - "name": "zookeeper_session_timeout", - "value": "60000" - }, - { - "desc": "For advanced use only, a list of derived configuration properties that will be used by the Service Monitor instead of the default ones.", - "display_name": "Service Monitor Derived Configs Advanced Configuration Snippet (Safety Valve)", - "name": "smon_derived_configs_safety_valve", - "value": null - }, - { - "desc": "Write buffer size in bytes. A larger buffer requires more memory on both the client and the server because the server instantiates the passed write buffer to process it but reduces the number of remote procedure calls (RPC). To estimate the amount of server memory used, multiply the value of 'hbase.client.write.buffer' by the value of 'hbase.regionserver.handler.count'.", - "display_name": "HBase Client Write Buffer", - "name": "hbase_client_write_buffer", - "value": "2097152" - }, - { - "desc": "The user the management services will impersonate when connecting to HBase. Defaults to 'hbase', a superuser.", - "display_name": "HBase User to Impersonate", - "name": "hbase_user_to_impersonate", - "value": "hbase" - } -] \ No newline at end of file diff --git a/sahara/plugins/cdh/v5/resources/hdfs-balancer.json b/sahara/plugins/cdh/v5/resources/hdfs-balancer.json deleted file mode 100644 index f2f777d7..00000000 --- a/sahara/plugins/cdh/v5/resources/hdfs-balancer.json +++ /dev/null @@ -1,44 +0,0 @@ -[ - { - "desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.", - "display_name": "Java Configuration Options for Balancer", - "name": "balancer_java_opts", - "value": "" - }, - { - "desc": "

This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.

Each rule has some or all of the following fields:


Example:{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}

This rule will send events to Cloudera Manager for every StringIndexOutOfBoundsException, up to a maximum of 10 every minute.

", - "display_name": "Rules to Extract Events from Log Files", - "name": "log_event_whitelist", - "value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.IOException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketClosedException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.EOFException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.nio.channels.CancelledKeyException\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Unknown job [^ ]+ being deleted.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Error executing shell command .+ No such process.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\".*attempt to override final parameter.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"[^ ]+ is a deprecated filesystem name. Use.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}\n" - }, - { - "desc": "For advanced use only, a string to be inserted into hdfs-site.xml for this role only.", - "display_name": "Balancer Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml", - "name": "balancer_config_safety_valve", - "value": null - }, - { - "desc": "The policy that should be used to rebalance HDFS storage. The default DataNode policy balances the storage at the DataNode level. This is similar to the balancing policy from prior releases. The BlockPool policy balances the storage at the block pool level as well as at the Datanode level. The BlockPool policy is relevant only to a Federated HDFS service.", - "display_name": "Rebalancing Policy", - "name": "rebalancing_policy", - "value": "DataNode" - }, - { - "desc": "The percentage deviation from average utilization, after which a node will be rebalanced. (for example, '10.0' for 10%)", - "display_name": "Rebalancing Threshold", - "name": "rebalancer_threshold", - "value": "10.0" - }, - { - "desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.", - "display_name": "Enable Configuration Change Alerts", - "name": "enable_config_alerts", - "value": "false" - }, - { - "desc": "Maximum size for the Java Process heap memory. Passed to Java -Xmx. Measured in bytes.", - "display_name": "Java Heap Size of Balancer in Bytes", - "name": "balancer_java_heapsize", - "value": "1073741824" - } -] \ No newline at end of file diff --git a/sahara/plugins/cdh/v5/resources/hdfs-datanode.json b/sahara/plugins/cdh/v5/resources/hdfs-datanode.json deleted file mode 100644 index 46c9fb2c..00000000 --- a/sahara/plugins/cdh/v5/resources/hdfs-datanode.json +++ /dev/null @@ -1,380 +0,0 @@ -[ - { - "desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.", - "display_name": "Log Directory Free Space Monitoring Percentage Thresholds", - "name": "log_directory_free_space_percentage_thresholds", - "value": "{\"critical\":\"never\",\"warning\":\"never\"}" - }, - { - "desc": "

This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.

Each rule has some or all of the following fields:


Example:{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}

This rule will send events to Cloudera Manager for every StringIndexOutOfBoundsException, up to a maximum of 10 every minute.

", - "display_name": "Rules to Extract Events from Log Files", - "name": "log_event_whitelist", - "value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.IOException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketClosedException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.EOFException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.nio.channels.CancelledKeyException\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 5, \"content\":\"Datanode registration failed\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Got a command from standby NN - ignoring command:.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Unknown job [^ ]+ being deleted.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Error executing shell command .+ No such process.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\".*attempt to override final parameter.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"[^ ]+ is a deprecated filesystem name. Use.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"}\n ]\n}\n" - }, - { - "desc": "Specifies the maximum number of threads to use for transferring data in and out of the DataNode.", - "display_name": "Maximum Number of Transfer Threads", - "name": "dfs_datanode_max_xcievers", - "value": "4096" - }, - { - "desc": "Comma-separated list of DataNode plug-ins to be activated. If one plug-in cannot be loaded, all the plug-ins are ignored.", - "display_name": "DateNode Plugins", - "name": "dfs_datanode_plugins_list", - "value": "" - }, - { - "desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.", - "display_name": "Cgroup I/O Weight", - "name": "rm_io_weight", - "value": "500" - }, - { - "desc": "In some workloads, the data read from HDFS is known to be significantly large enough that it is unlikely to be useful to cache it in the operating system buffer cache. In this case, the DataNode may be configured to automatically purge all data from the buffer cache after it is delivered to the client. This may improve performance for some workloads by freeing buffer cache spare usage for more cacheable data. This behavior will always be disabled for workloads that read only short sections of a block (e.g HBase random-IO workloads). This property is supported in CDH3u3 or later deployments.", - "display_name": "Enable purging cache after reads", - "name": "dfs_datanode_drop_cache_behind_reads", - "value": "false" - }, - { - "desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.", - "display_name": "Cgroup CPU Shares", - "name": "rm_cpu_shares", - "value": "1024" - }, - { - "desc": "Comma-delimited list of directories on the local file system where the DataNode stores HDFS block data. Typical values are /data/N/dfs/dn for N = 1, 2, 3... These directories should be mounted using the noatime option and the disks should be configured using JBOD. RAID is not recommended.", - "display_name": "DataNode Data Directory", - "name": "dfs_data_dir_list", - "value": null - }, - { - "desc": "The number of volumes that are allowed to fail before a DataNode stops offering service. By default, any volume failure will cause a DataNode to shutdown.", - "display_name": "DataNode Failed Volumes Tolerated", - "name": "dfs_datanode_failed_volumes_tolerated", - "value": "0" - }, - { - "desc": "In some workloads, the data written to HDFS is known to be significantly large enough that it is unlikely to be useful to cache it in the operating system buffer cache. In this case, the DataNode may be configured to automatically purge all data from the buffer cache after it is written to disk. This may improve performance for some workloads by freeing buffer cache spare usage for more cacheable data. This property is supported in CDH3u3 or later deployments.", - "display_name": "Enable purging cache after writes", - "name": "dfs_datanode_drop_cache_behind_writes", - "value": "false" - }, - { - "desc": "If enabled, the DataNode binds to the wildcard address (\"0.0.0.0\") on all of its ports.", - "display_name": "Bind DataNode to Wildcard Address", - "name": "dfs_datanode_bind_wildcard", - "value": "false" - }, - { - "desc": "The number of server threads for the DataNode.", - "display_name": "Handler Count", - "name": "dfs_datanode_handler_count", - "value": "3" - }, - { - "desc": "When computing the overall DataNode health, consider the host's health.", - "display_name": "DataNode Host Health Test", - "name": "datanode_host_health_enabled", - "value": "true" - }, - { - "desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.", - "display_name": "Automatically Restart Process", - "name": "process_auto_restart", - "value": "true" - }, - { - "desc": "The maximum number of rolled log files to keep for DataNode logs. Typically used by log4j.", - "display_name": "DataNode Maximum Log File Backups", - "name": "max_log_backup_index", - "value": "10" - }, - { - "desc": "

The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.

Each trigger has all of the following fields: