68999ce9cb
We add a directory v_5_4_0 in cdh plugin, so that we have a choice 5.4.0 when cdh plugin is chosen. Implements: blueprint cdh-5-4-support Change-Id: I05d2082b589eb031b0ce1b5a253dc62a75912e1b
548 lines
36 KiB
JSON
548 lines
36 KiB
JSON
[
|
|
{
|
|
"desc": "Name of the journal located on each JournalNode filesystem.",
|
|
"display_name": "Quorum-based Storage Journal name",
|
|
"name": "dfs_namenode_quorum_journal_name",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "Whether or not periodic stacks collection is enabled.",
|
|
"display_name": "Stacks Collection Enabled",
|
|
"name": "stacks_collection_enabled",
|
|
"value": "false"
|
|
},
|
|
{
|
|
"desc": "Indicate whether or not to avoid reading from stale DataNodes for which heartbeat messages have not been received by the NameNode for more than Stale DataNode Time Interval. Stale DataNodes are moved to the end of the node list returned for reading. See dfs.namenode.avoid.write.stale.datanode for a similar setting for writes.",
|
|
"display_name": "Avoid Reading Stale DataNode",
|
|
"name": "dfs_namenode_avoid_read_stale_datanode",
|
|
"value": "false"
|
|
},
|
|
{
|
|
"desc": "The base port where the DFS NameNode web UI listens. If the port number is 0, then the server starts on a free port. Combined with the NameNode's hostname to build its HTTP address.",
|
|
"display_name": "NameNode Web UI Port",
|
|
"name": "dfs_http_port",
|
|
"value": "50070"
|
|
},
|
|
{
|
|
"desc": "Enables the health test that the NameNode is not in safemode",
|
|
"display_name": "NameNode Safemode Health Test",
|
|
"name": "namenode_safe_mode_enabled",
|
|
"value": "true"
|
|
},
|
|
{
|
|
"desc": "The method used to collect stacks. The jstack option involves periodically running the jstack command against the role's daemon process. The servlet method is available for those roles that have an HTTP server endpoint exposing the current stacks traces of all threads. When the servlet method is selected, that HTTP endpoint is periodically scraped.",
|
|
"display_name": "Stacks Collection Method",
|
|
"name": "stacks_collection_method",
|
|
"value": "jstack"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds on the duration of the metrics request to the web server.",
|
|
"display_name": "Web Metric Collection Duration",
|
|
"name": "namenode_web_metric_collection_thresholds",
|
|
"value": "{\"critical\":\"never\",\"warning\":\"10000.0\"}"
|
|
},
|
|
{
|
|
"desc": "The health check thresholds of the NameNode's RPC latency.",
|
|
"display_name": "NameNode RPC Latency Thresholds",
|
|
"name": "namenode_rpc_latency_thresholds",
|
|
"value": "{\"critical\":\"5000.0\",\"warning\":\"1000.0\"}"
|
|
},
|
|
{
|
|
"desc": "The period to review when computing the moving average of extra time the pause monitor spent paused.",
|
|
"display_name": "Pause Duration Monitoring Period",
|
|
"name": "namenode_pause_duration_window",
|
|
"value": "5"
|
|
},
|
|
{
|
|
"desc": "The period to review when computing unexpected exits.",
|
|
"display_name": "Unexpected Exits Monitoring Period",
|
|
"name": "unexpected_exits_window",
|
|
"value": "5"
|
|
},
|
|
{
|
|
"desc": "The maximum number of outgoing replication threads a node can have at one time. This limit is waived for the highest priority replications. Configure dfs.namenode.replication.max-streams-hard-limit to set the absolute limit, including the highest-priority replications.",
|
|
"display_name": "Maximum Number of Replication Threads on a DataNode",
|
|
"name": "dfs_namenode_replication_max_streams",
|
|
"value": "20"
|
|
},
|
|
{
|
|
"desc": "For advanced use only, a string to be inserted into <strong>log4j.properties</strong> for this role only.",
|
|
"display_name": "NameNode Logging Advanced Configuration Snippet (Safety Valve)",
|
|
"name": "log4j_safety_valve",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "When the ratio of number stale DataNodes to total DataNodes marked is greater than this ratio, permit writing to stale nodes to prevent causing hotspots.",
|
|
"display_name": "Write Stale DataNode Ratio",
|
|
"name": "dfs_namenode_write_stale_datanode_ratio",
|
|
"value": "0.5"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds for the weighted average extra time the pause monitor spent paused. Specified as a percentage of elapsed wall clock time.",
|
|
"display_name": "Pause Duration Thresholds",
|
|
"name": "namenode_pause_duration_thresholds",
|
|
"value": "{\"critical\":\"60.0\",\"warning\":\"30.0\"}"
|
|
},
|
|
{
|
|
"desc": "When set, Cloudera Manager will send alerts when the health of this role reaches the threshold specified by the EventServer setting eventserver_health_events_alert_threshold",
|
|
"display_name": "Enable Health Alerts for this Role",
|
|
"name": "enable_alerts",
|
|
"value": "true"
|
|
},
|
|
{
|
|
"desc": "Enables the health test that the NameNode's process state is consistent with the role configuration",
|
|
"display_name": "NameNode Process Health Test",
|
|
"name": "namenode_scm_health_enabled",
|
|
"value": "true"
|
|
},
|
|
{
|
|
"desc": "When set, generates heap dump file when java.lang.OutOfMemoryError is thrown.",
|
|
"display_name": "Dump Heap When Out of Memory",
|
|
"name": "oom_heap_dump_enabled",
|
|
"value": "false"
|
|
},
|
|
{
|
|
"desc": "Enables the health test of the rolling metadata upgrade status of the NameNode. This covers rolling metadata upgrades. Nonrolling metadata upgrades are covered in a separate health test.",
|
|
"display_name": "HDFS Rolling Metadata Upgrade Status Health Test",
|
|
"name": "namenode_rolling_upgrade_status_enabled",
|
|
"value": "true"
|
|
},
|
|
{
|
|
"desc": "Mountpoints that are mapped to this NameNode's Nameservice.",
|
|
"display_name": "Mountpoints",
|
|
"name": "nameservice_mountpoints",
|
|
"value": "/"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds of the number of file descriptors used. Specified as a percentage of file descriptor limit.",
|
|
"display_name": "File Descriptor Monitoring Thresholds",
|
|
"name": "namenode_fd_thresholds",
|
|
"value": "{\"critical\":\"70.0\",\"warning\":\"50.0\"}"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Log Directory Free Space Monitoring Absolute Thresholds setting is configured.",
|
|
"display_name": "Log Directory Free Space Monitoring Percentage Thresholds",
|
|
"name": "log_directory_free_space_percentage_thresholds",
|
|
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
|
|
},
|
|
{
|
|
"desc": "<p>This file contains the rules which govern how log messages are turned into events by the custom log4j appender that this role loads. It is in JSON format, and is composed of a list of rules. Every log message is evaluated against each of these rules in turn to decide whether or not to send an event for that message.</p><p>Each rule has some or all of the following fields:</p><ul><li><span class='code'>alert</span> - whether or not events generated from this rule should be promoted to alerts. A value of \"true\" will cause alerts to be generated. If not specified, the default is \"false\".</li><li><span class='code'>rate</span> <strong>(mandatory)</strong> - the maximum number of log messages matching this rule that may be sent as events every minute. If more than <tt>rate</tt> matching log messages are received in a single minute, the extra messages are ignored. If rate is less than 0, the number of messages per minute is unlimited.</li><li><span class='code'>periodminutes</span> - the number of minutes during which the publisher will only publish <tt>rate</tt> events or fewer. If not specified, the default is <strong>one minute</strong></li><li><span class='code'>threshold</span> - apply this rule only to messages with this log4j severity level or above. An example is \"WARN\" for warning level messages or higher.</li><li><span class='code'>content</span> - match only those messages whose contents match this regular expression.</li><li><span class='code'>exceptiontype</span> - match only those messages which are part of an exception message. The exception type must match this regular expression.</li></ul><br/><p>Example:<span class='code'>{\"alert\": false, \"rate\": 10, \"exceptiontype\": \"java.lang.StringIndexOutOfBoundsException\"}</span></p><p>This rule will send events to Cloudera Manager for every <span class='code'>StringIndexOutOfBoundsException</span>, up to a maximum of 10 every minute.</p>",
|
|
"display_name": "Rules to Extract Events from Log Files",
|
|
"name": "log_event_whitelist",
|
|
"value": "{\n \"version\": \"0\",\n \"rules\": [\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"FATAL\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Instead, use .*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\": \".* is deprecated. Use .* instead\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.IOException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.net.SocketClosedException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.io.EOFException\"},\n {\"alert\": false, \"rate\": 0, \"exceptiontype\": \"java.nio.channels.CancelledKeyException\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 2, \"exceptiontype\": \".*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Unknown job [^ ]+ being deleted.*\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"Error executing shell command .+ No such process.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\".*attempt to override final parameter.+\"},\n {\"alert\": false, \"rate\": 0, \"threshold\":\"WARN\", \"content\":\"[^ ]+ is a deprecated filesystem name. Use.*\"},\n {\"alert\": false, \"rate\": 1, \"periodminutes\": 1, \"threshold\":\"WARN\"},\n {\"alert\": false, \"rate\": 1, \"threshold\":\"INFO\", \"content\":\"Triggering checkpoint.*\"}\n ]\n}\n"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's heap dump directory.",
|
|
"display_name": "Heap Dump Directory Free Space Monitoring Absolute Thresholds",
|
|
"name": "heap_dump_directory_free_space_absolute_thresholds",
|
|
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
|
|
},
|
|
{
|
|
"desc": "Maximum size in bytes for the Java Process heap memory. Passed to Java -Xmx.",
|
|
"display_name": "Java Heap Size of Namenode in Bytes",
|
|
"name": "namenode_java_heapsize",
|
|
"value": "1073741824"
|
|
},
|
|
{
|
|
"desc": "The maximum size, in megabytes, per log file for NameNode logs. Typically used by log4j or logback.",
|
|
"display_name": "NameNode Max Log Size",
|
|
"name": "max_log_size",
|
|
"value": "200"
|
|
},
|
|
{
|
|
"desc": "Determines where on the local file system the NameNode should store the name table (fsimage). For redundancy, enter a comma-delimited list of directories to replicate the name table in all of the directories. Typical values are /data/N/dfs/nn where N=1..3.",
|
|
"display_name": "NameNode Data Directories",
|
|
"name": "dfs_name_dir_list",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "Default time interval for marking a DataNode as \"stale\". If the NameNode has not received heartbeat messages from a DataNode for more than this time interval, the DataNode is marked and treated as \"stale\" by default.",
|
|
"display_name": "Stale DataNode Time Interval",
|
|
"name": "dfs_namenode_stale_datanode_interval",
|
|
"value": "30000"
|
|
},
|
|
{
|
|
"desc": "The port where the NameNode runs the HDFS protocol. Combined with the NameNode's hostname to build its address.",
|
|
"display_name": "NameNode Port",
|
|
"name": "namenode_port",
|
|
"value": "8020"
|
|
},
|
|
{
|
|
"desc": "Advanced Configuration Snippet (Safety Valve) for Hadoop Metrics2. Properties will be inserted into <strong>hadoop-metrics2.properties</strong>.",
|
|
"display_name": "Hadoop Metrics2 Advanced Configuration Snippet (Safety Valve)",
|
|
"name": "hadoop_metrics2_safety_valve",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "The amount of stacks data that is retained. After the retention limit is reached, the oldest data is deleted.",
|
|
"display_name": "Stacks Collection Data Retention",
|
|
"name": "stacks_collection_data_retention",
|
|
"value": "104857600"
|
|
},
|
|
{
|
|
"desc": "The period to review when computing the moving average of the NameNode's RPC latency.",
|
|
"display_name": "NameNode RPC Latency Monitoring Window",
|
|
"name": "namenode_rpc_latency_window",
|
|
"value": "5"
|
|
},
|
|
{
|
|
"desc": "Optional port for the service-rpc address which can be used by HDFS daemons instead of sharing the RPC address used by the clients.",
|
|
"display_name": "NameNode Service RPC Port",
|
|
"name": "dfs_namenode_servicerpc_address",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "The health check thresholds for the number of out-of-sync JournalNodes for this NameNode.",
|
|
"display_name": "NameNode Out-Of-Sync JournalNodes Thresholds",
|
|
"name": "namenode_out_of_sync_journal_nodes_thresholds",
|
|
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's Determines where on the local file system the NameNode should store the name table (fsimage). For redundancy, enter a comma-delimited list of directories to replicate the name table in all of the directories. Typical values are /data/N/dfs/nn where N=1..3..",
|
|
"display_name": "NameNode Data Directories Free Space Monitoring Absolute Thresholds",
|
|
"name": "namenode_data_directories_free_space_absolute_thresholds",
|
|
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
|
|
},
|
|
{
|
|
"desc": "The absolute maximum number of outgoing replication threads a given node can have at one time. The regular limit (dfs.namenode.replication.max-streams) is waived for highest-priority block replications. Highest replication priority is for blocks that are at a very high risk of loss if the disk or server on which they remain fails. These are usually blocks with only one copy, or blocks with zero live copies but a copy in a node being decommissioned. dfs.namenode.replication.max-streams-hard-limit provides a limit on the total number of outgoing replication threads, including threads of all priorities.",
|
|
"display_name": "Hard Limit on the Number of Replication Threads on a Datanode",
|
|
"name": "dfs_namenode_replication_max_streams_hard_limit",
|
|
"value": "40"
|
|
},
|
|
{
|
|
"desc": "For advanced use only, key-value pairs (one on each line) to be inserted into a role's environment. Applies to configurations of this role except client configuration.",
|
|
"display_name": "NameNode Environment Advanced Configuration Snippet (Safety Valve)",
|
|
"name": "NAMENODE_role_env_safety_valve",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's heap dump directory. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Heap Dump Directory Free Space Monitoring Absolute Thresholds setting is configured.",
|
|
"display_name": "Heap Dump Directory Free Space Monitoring Percentage Thresholds",
|
|
"name": "heap_dump_directory_free_space_percentage_thresholds",
|
|
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
|
|
},
|
|
{
|
|
"desc": "Directory where NameNode will place its log files.",
|
|
"display_name": "NameNode Log Directory",
|
|
"name": "namenode_log_dir",
|
|
"value": "/var/log/hadoop-hdfs"
|
|
},
|
|
{
|
|
"desc": "Number of CPU shares to assign to this role. The greater the number of shares, the larger the share of the host's CPUs that will be given to this role when the host experiences CPU contention. Must be between 2 and 262144. Defaults to 1024 for processes not managed by Cloudera Manager.",
|
|
"display_name": "Cgroup CPU Shares",
|
|
"name": "rm_cpu_shares",
|
|
"value": "1024"
|
|
},
|
|
{
|
|
"desc": "Enables the health test of the metadata upgrade status of the NameNode. This covers nonrolling metadata upgrades. Rolling metadata upgrades are covered in a separate health test.",
|
|
"display_name": "HDFS Metadata Upgrade Status Health Test",
|
|
"name": "namenode_upgrade_status_enabled",
|
|
"value": "true"
|
|
},
|
|
{
|
|
"desc": "Nameservice of this NameNode. The Nameservice represents the interface to this NameNode and its High Availability partner. The Nameservice also represents the namespace associated with a federated NameNode.",
|
|
"display_name": "NameNode Nameservice",
|
|
"name": "dfs_federation_namenode_nameservice",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "The minimum log level for NameNode logs",
|
|
"display_name": "NameNode Logging Threshold",
|
|
"name": "log_threshold",
|
|
"value": "INFO"
|
|
},
|
|
{
|
|
"desc": "Specifies the percentage of blocks that should satisfy the minimal replication requirement defined by dfs.replication.min. Enter a value less than or equal to 0 to wait for any particular percentage of blocks before exiting safemode. Values greater than 1 will make safemode permanent.",
|
|
"display_name": "Safemode Threshold Percentage",
|
|
"name": "dfs_safemode_threshold_pct",
|
|
"value": "0.999"
|
|
},
|
|
{
|
|
"desc": "Directories on the local file system to store the NameNode edits. If not set, the edits are stored in the NameNode's Data Directories. The value of this configuration is automatically generated to be the Quorum-based Storage URI if there are JournalNodes and this NameNode is not Highly Available.",
|
|
"display_name": "NameNode Edits Directories",
|
|
"name": "dfs_namenode_edits_dir",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "If set to false and if one of the replicas of the NameNode storage fails, such as temporarily failure of NFS, this directory is not used until the NameNode restarts. If enabled, failed storage is re-checked on every checkpoint and, if it becomes valid, the NameNode will try to restore the edits and fsimage.",
|
|
"display_name": "Restore NameNode Directories at Checkpoint Time",
|
|
"name": "dfs_name_dir_restore",
|
|
"value": "false"
|
|
},
|
|
{
|
|
"desc": "The amount of time allowed after this role is started that failures of health checks that rely on communication with this role will be tolerated.",
|
|
"display_name": "Health Check Startup Tolerance",
|
|
"name": "namenode_startup_tolerance",
|
|
"value": "5"
|
|
},
|
|
{
|
|
"desc": "These arguments will be passed as part of the Java command line. Commonly, garbage collection flags or extra debugging flags would be passed here.",
|
|
"display_name": "Java Configuration Options for NameNode",
|
|
"name": "namenode_java_opts",
|
|
"value": "-XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+CMSParallelRemarkEnabled"
|
|
},
|
|
{
|
|
"desc": "Enable Automatic Failover to maintain High Availability. Requires a ZooKeeper service and a High Availability NameNode partner.",
|
|
"display_name": "Enable Automatic Failover",
|
|
"name": "autofailover_enabled",
|
|
"value": "false"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's Determines where on the local file system the NameNode should store the name table (fsimage). For redundancy, enter a comma-delimited list of directories to replicate the name table in all of the directories. Typical values are /data/N/dfs/nn where N=1..3.. Specified as a percentage of the capacity on that filesystem. This setting is not used if a Determines where on the local file system the NameNode should store the name table (fsimage). For redundancy, enter a comma-delimited list of directories to replicate the name table in all of the directories. Typical values are /data/N/dfs/nn where N=1..3. Free Space Monitoring Absolute Thresholds setting is configured.",
|
|
"display_name": "NameNode Data Directories Free Space Monitoring Percentage Thresholds",
|
|
"name": "namenode_data_directories_free_space_percentage_thresholds",
|
|
"value": "{\"critical\":\"never\",\"warning\":\"never\"}"
|
|
},
|
|
{
|
|
"desc": "For advanced use only, a string to be inserted into <strong>dfs_hosts_exclude.txt</strong> for this role only.",
|
|
"display_name": "NameNode Advanced Configuration Snippet (Safety Valve) for dfs_hosts_exclude.txt",
|
|
"name": "namenode_hosts_exclude_safety_valve",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "The health test thresholds for unexpected exits encountered within a recent period specified by the unexpected_exits_window configuration for the role.",
|
|
"display_name": "Unexpected Exits Thresholds",
|
|
"name": "unexpected_exits_thresholds",
|
|
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
|
|
},
|
|
{
|
|
"desc": "When set, this role's process is automatically (and transparently) restarted in the event of an unexpected failure.",
|
|
"display_name": "Automatically Restart Process",
|
|
"name": "process_auto_restart",
|
|
"value": "false"
|
|
},
|
|
{
|
|
"desc": "This determines the percentage amount of block invalidations (deletes) to do over a single DataNode heartbeat deletion command. The final deletion count is determined by applying this percentage to the number of live nodes in the system. The resultant number is the number of blocks from the deletion list chosen for proper invalidation over a single heartbeat of a single DataNode.",
|
|
"display_name": "Invalidate Work Percentage Per Iteration",
|
|
"name": "dfs_namenode_invalidate_work_pct_per_iteration",
|
|
"value": "0.32"
|
|
},
|
|
{
|
|
"desc": "Enables the health test that the Cloudera Manager Agent can successfully contact and gather metrics from the web server.",
|
|
"display_name": "Web Metric Collection",
|
|
"name": "namenode_web_metric_collection_enabled",
|
|
"value": "true"
|
|
},
|
|
{
|
|
"desc": "Hard memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
|
|
"display_name": "Cgroup Memory Hard Limit",
|
|
"name": "rm_memory_hard_limit",
|
|
"value": "-1"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds on the swap memory usage of the process.",
|
|
"display_name": "Process Swap Memory Thresholds",
|
|
"name": "process_swap_memory_thresholds",
|
|
"value": "{\"critical\":\"never\",\"warning\":\"any\"}"
|
|
},
|
|
{
|
|
"desc": "The time between two periodic file system checkpoints.",
|
|
"display_name": "Filesystem Checkpoint Period",
|
|
"name": "fs_checkpoint_period",
|
|
"value": "3600"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds for monitoring of free space on the filesystem that contains this role's log directory.",
|
|
"display_name": "Log Directory Free Space Monitoring Absolute Thresholds",
|
|
"name": "log_directory_free_space_absolute_thresholds",
|
|
"value": "{\"critical\":\"5.36870912E9\",\"warning\":\"1.073741824E10\"}"
|
|
},
|
|
{
|
|
"desc": "When computing the overall NameNode health, consider the host's health.",
|
|
"display_name": "NameNode Host Health Test",
|
|
"name": "namenode_host_health_enabled",
|
|
"value": "true"
|
|
},
|
|
{
|
|
"desc": "The access time for HDFS file is precise upto this value. Setting the value of 0 disables access times for HDFS. When using the NFS Gateway role, make sure this property is enabled.",
|
|
"display_name": "Access Time Precision",
|
|
"name": "dfs_access_time_precision",
|
|
"value": "3600000"
|
|
},
|
|
{
|
|
"desc": "Minimum number of running threads for the Hue Thrift server running on the NameNode",
|
|
"display_name": "Hue Thrift Server Min Threadcount",
|
|
"name": "dfs_thrift_threads_min",
|
|
"value": "10"
|
|
},
|
|
{
|
|
"desc": "The number of transactions after which the NameNode or SecondaryNameNode will create a checkpoint of the namespace, regardless of whether the checkpoint period has expired.",
|
|
"display_name": "Filesystem Checkpoint Transaction Threshold",
|
|
"name": "fs_checkpoint_txns",
|
|
"value": "1000000"
|
|
},
|
|
{
|
|
"desc": "Maximum number of running threads for the Hue Thrift server running on the NameNode",
|
|
"display_name": "Hue Thrift Server Max Threadcount",
|
|
"name": "dfs_thrift_threads_max",
|
|
"value": "20"
|
|
},
|
|
{
|
|
"desc": "Full path to a custom topology script on the host file system. The topology script is used to determine the rack location of nodes. If left blank, a topology script will be provided that uses your hosts' rack information, visible in the \"Hosts\" page.",
|
|
"display_name": "Topology Script File Name",
|
|
"name": "topology_script_file_name",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "Path to directory where heap dumps are generated when java.lang.OutOfMemoryError error is thrown. This directory is automatically created if it does not exist. If this directory already exists, role user must have write access to this directory. If this directory is shared among multiple roles, it should have 1777 permissions. The heap dump files are created with 600 permissions and are owned by the role user. The amount of free space in this directory should be greater than the maximum Java Process heap size configured for this role.",
|
|
"display_name": "Heap Dump Directory",
|
|
"name": "oom_heap_dump_dir",
|
|
"value": "/tmp"
|
|
},
|
|
{
|
|
"desc": "The frequency with which stacks are collected.",
|
|
"display_name": "Stacks Collection Frequency",
|
|
"name": "stacks_collection_frequency",
|
|
"value": "5.0"
|
|
},
|
|
{
|
|
"desc": "Indicate whether or not to avoid writing to stale DataNodes for which heartbeat messages have not been received by the NameNode for more than Stale DataNode Time Interval. Writes avoid using stale DataNodes unless more than a configured ratio (dfs.namenode.write.stale.datanode.ratio) of DataNodes are marked as stale. See dfs.namenode.avoid.read.stale.datanode for a similar setting for reads.",
|
|
"display_name": "Avoid Writing Stale DataNode",
|
|
"name": "dfs_namenode_avoid_write_stale_datanode",
|
|
"value": "false"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds of failed status directories in a NameNode.",
|
|
"display_name": "NameNode Directory Failures Thresholds",
|
|
"name": "namenode_directory_failures_thresholds",
|
|
"value": "{\"critical\":\"any\",\"warning\":\"never\"}"
|
|
},
|
|
{
|
|
"desc": "Number of minutes between trash checkpoints. Also controls the number of minutes after which a trash checkpoint directory is deleted. To disable the trash feature, enter 0.",
|
|
"display_name": "Filesystem Trash Interval",
|
|
"name": "fs_trash_interval",
|
|
"value": "1440"
|
|
},
|
|
{
|
|
"desc": "For advanced use only, a string to be inserted into <strong>hdfs-site.xml</strong> for this role only.",
|
|
"display_name": "NameNode Advanced Configuration Snippet (Safety Valve) for hdfs-site.xml",
|
|
"name": "namenode_config_safety_valve",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "This determines the total amount of block transfers to begin in parallel at a DataNode for replication, when such a command list is being sent over a DataNode heartbeat by the NameNode. The actual number is obtained by multiplying this value by the total number of live nodes in the cluster. The result number is the number of blocks to transfer immediately, per DataNode heartbeat.",
|
|
"display_name": "Replication Work Multiplier Per Iteration",
|
|
"name": "dfs_namenode_replication_work_multiplier_per_iteration",
|
|
"value": "10"
|
|
},
|
|
{
|
|
"desc": "The maximum number of rolled log files to keep for NameNode logs. Typically used by log4j or logback.",
|
|
"display_name": "NameNode Maximum Log File Backups",
|
|
"name": "max_log_backup_index",
|
|
"value": "10"
|
|
},
|
|
{
|
|
"desc": "Determines extension of safemode in milliseconds after the threshold level is reached.",
|
|
"display_name": "Safemode Extension",
|
|
"name": "dfs_safemode_extension",
|
|
"value": "30000"
|
|
},
|
|
{
|
|
"desc": "Timeout in seconds for the Hue Thrift server running on the NameNode",
|
|
"display_name": "Hue Thrift Server Timeout",
|
|
"name": "dfs_thrift_timeout",
|
|
"value": "60"
|
|
},
|
|
{
|
|
"desc": "Soft memory limit to assign to this role, enforced by the Linux kernel. When the limit is reached, the kernel will reclaim pages charged to the process if and only if the host is facing memory pressure. If reclaiming fails, the kernel may kill the process. Both anonymous as well as page cache pages contribute to the limit. Use a value of -1 B to specify no limit. By default processes not managed by Cloudera Manager will have no limit.",
|
|
"display_name": "Cgroup Memory Soft Limit",
|
|
"name": "rm_memory_soft_limit",
|
|
"value": "-1"
|
|
},
|
|
{
|
|
"desc": "The number of server threads for the NameNode used for service calls. Only used when NameNode Service RPC Port is configured.",
|
|
"display_name": "NameNode Service Handler Count",
|
|
"name": "dfs_namenode_service_handler_count",
|
|
"value": "30"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds of the number of transactions since the last HDFS namespace checkpoint. Specified as a percentage of the configured checkpointing transaction limit.",
|
|
"display_name": "Filesystem Checkpoint Transactions Monitoring Thresholds",
|
|
"name": "namenode_checkpoint_transactions_thresholds",
|
|
"value": "{\"critical\":\"400.0\",\"warning\":\"200.0\"}"
|
|
},
|
|
{
|
|
"desc": "The health test thresholds of the age of the HDFS namespace checkpoint. Specified as a percentage of the configured checkpoint interval.",
|
|
"display_name": "Filesystem Checkpoint Age Monitoring Thresholds",
|
|
"name": "namenode_checkpoint_age_thresholds",
|
|
"value": "{\"critical\":\"400.0\",\"warning\":\"200.0\"}"
|
|
},
|
|
{
|
|
"desc": "When set, a SIGKILL signal is sent to the role process when java.lang.OutOfMemoryError is thrown.",
|
|
"display_name": "Kill When Out of Memory",
|
|
"name": "oom_sigkill_enabled",
|
|
"value": "true"
|
|
},
|
|
{
|
|
"desc": "The base port where the secure NameNode web UI listens.",
|
|
"display_name": "Secure NameNode Web UI Port (SSL)",
|
|
"name": "dfs_https_port",
|
|
"value": "50470"
|
|
},
|
|
{
|
|
"desc": "For advanced use only, a string to be inserted into <strong>dfs_hosts_allow.txt</strong> for this role only.",
|
|
"display_name": "NameNode Advanced Configuration Snippet (Safety Valve) for dfs_hosts_allow.txt",
|
|
"name": "namenode_hosts_allow_safety_valve",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "<p>The configured triggers for this role. This is a JSON formatted list of triggers. These triggers are evaluated as part as the health system. Every trigger expression is parsed, and if the trigger condition is met, the list of actions provided in the trigger expression is executed.</p><p>Each trigger has all of the following fields:</p><ul><li><code>triggerName</code> <strong>(mandatory)</strong> - The name of the trigger. This value must be unique for the specific role. </li><li><code>triggerExpression</code> <strong>(mandatory)</strong> - A tsquery expression representing the trigger. </li><li><code>streamThreshold</code> <strong>(optional)</strong> - The maximum number of streams that can satisfy a condition of a trigger before the condition fires. By default set to 0, and any stream returned causes the condition to fire. </li><li><code>enabled</code> <strong> (optional)</strong> - By default set to 'true'. If set to 'false', the trigger will not be evaluated.</li><li><code>expressionEditorConfig</code> <strong> (optional)</strong> - Metadata for the trigger editor. If present, the trigger should only be edited from the Edit Trigger page; editing the trigger here may lead to inconsistencies.</li></ul></p><p>For example, the following JSON formatted trigger configured for a DataNode fires if the DataNode has more than 1500 file-descriptors opened:</p><p><pre>[{\"triggerName\": \"sample-trigger\",\n \"triggerExpression\": \"IF (SELECT fd_open WHERE roleName=$ROLENAME and last(fd_open) > 1500) DO health:bad\",\n \"streamThreshold\": 0, \"enabled\": \"true\"}]</pre></p><p>See the trigger rules documentation for more details on how to write triggers using tsquery.</p><p>The JSON format is evolving and may change in the future and, as a result, backward compatibility is not guaranteed between releases at this time.</p>",
|
|
"display_name": "Role Triggers",
|
|
"name": "role_triggers",
|
|
"value": "[]"
|
|
},
|
|
{
|
|
"desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.",
|
|
"display_name": "Enable Configuration Change Alerts",
|
|
"name": "enable_config_alerts",
|
|
"value": "false"
|
|
},
|
|
{
|
|
"desc": "If enabled, the NameNode binds to the wildcard address (\"0.0.0.0\") on all of its ports.",
|
|
"display_name": "Bind NameNode to Wildcard Address",
|
|
"name": "namenode_bind_wildcard",
|
|
"value": "false"
|
|
},
|
|
{
|
|
"desc": "Comma-separated list of NameNode plug-ins to be activated. If one plug-in cannot be loaded, all the plug-ins are ignored.",
|
|
"display_name": "NameNode Plugins",
|
|
"name": "dfs_namenode_plugins_list",
|
|
"value": ""
|
|
},
|
|
{
|
|
"desc": "Directory on a shared storage device, such as a Quorum-based Storage URI or a local directory that is an NFS mount from a NAS, to store the NameNode edits. The value of this configuration is automatically generated to be the Quourm Journal URI if there are JournalNodes and this NameNode is Highly Available.",
|
|
"display_name": "Shared Edits Directory",
|
|
"name": "dfs_namenode_shared_edits_dir",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "The directory in which stacks logs are placed. If not set, stacks are logged into a <span class='code'>stacks</span> subdirectory of the role's log directory.",
|
|
"display_name": "Stacks Collection Directory",
|
|
"name": "stacks_collection_directory",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "Weight for the read I/O requests issued by this role. The greater the weight, the higher the priority of the requests when the host experiences I/O contention. Must be between 100 and 1000. Defaults to 1000 for processes not managed by Cloudera Manager.",
|
|
"display_name": "Cgroup I/O Weight",
|
|
"name": "rm_io_weight",
|
|
"value": "500"
|
|
},
|
|
{
|
|
"desc": "The number of server threads for the NameNode.",
|
|
"display_name": "NameNode Handler Count",
|
|
"name": "dfs_namenode_handler_count",
|
|
"value": "30"
|
|
},
|
|
{
|
|
"desc": "If configured, overrides the process soft and hard rlimits (also called ulimits) for file descriptors to the configured value.",
|
|
"display_name": "Maximum Process File Descriptors",
|
|
"name": "rlimit_fds",
|
|
"value": null
|
|
},
|
|
{
|
|
"desc": "Specifies the number of DataNodes that must be live before the name node exits safemode. Enter a value less than or equal to 0 to take the number of live DataNodes into account when deciding whether to remain in safemode during startup. Values greater than the number of DataNodes in the cluster will make safemode permanent.",
|
|
"display_name": "Safemode Minimum DataNodes",
|
|
"name": "dfs_safemode_min_datanodes",
|
|
"value": "0"
|
|
}
|
|
] |