[ { "desc": "For MapReduce job outputs that are compressed, specify the compression codec to use. Will be part of generated client configuration.", "display_name": "Compression Codec of MapReduce Job Output", "name": "mapred_output_compression_codec", "value": "org.apache.hadoop.io.compress.DefaultCodec" }, { "desc": "The default number of parallel transfers run by reduce during the copy (shuffle) phase. This number should be between sqrt(nodes*number_of_map_slots_per_node) and nodes*number_of_map_slots_per_node/2. Will be part of generated client configuration.", "display_name": "Default Number of Parallel Transfers During Shuffle", "name": "mapred_reduce_parallel_copies", "value": "10" }, { "desc": "Whether Map tasks should attempt to use the optimized native implementation of the map-side output collector. This can improve performance of many jobs which are shuffle-intensive, though is experimental in CDH 5.2.", "display_name": "Enable Optimized Map-side Output Collector", "name": "mapreduce_enable_native_map_output_collector", "value": "false" }, { "desc": "The directory where the client configs will be deployed", "display_name": "Deploy Directory", "name": "client_config_root_dir", "value": "/etc/hadoop" }, { "desc": "The number of streams to merge at the same time while sorting files. That is, the number of sort heads to use during the merge sort on the reducer side. This determines the number of open file handles. Merging more files in parallel reduces merge sort iterations and improves run time by eliminating disk I/O. Note that merging more files in parallel uses more memory. If 'io.sort.factor' is set too high or the maximum JVM heap is set too low, excessive garbage collection will occur. The Hadoop default is 10, but Cloudera recommends a higher value. Will be part of generated client configuration.", "display_name": "I/O Sort Factor", "name": "io_sort_factor", "value": "64" }, { "desc": "The priority level that the client configuration will have in the Alternatives system on the hosts. Higher priority levels will cause Alternatives to prefer this configuration over any others.", "display_name": "Alternatives Priority", "name": "client_config_priority", "value": "92" }, { "desc": "Location to store the job history files of running jobs. This is a path on the host where the JobTracker is running.", "display_name": "Running Job History Location", "name": "hadoop_job_history_dir", "value": "/var/log/hadoop-mapreduce/history" }, { "desc": "The number of virtual CPU cores allocated for each map task of a job. This parameter has no effect prior to CDH 4.4.", "display_name": "Map Task CPU Virtual Cores", "name": "mapreduce_map_cpu_vcores", "value": "1" }, { "desc": "If enabled, multiple instances of some reduce tasks may be executed in parallel.", "display_name": "Reduce Tasks Speculative Execution", "name": "mapred_reduce_tasks_speculative_execution", "value": "false" }, { "desc": "The application framework to run jobs with. If not set, jobs will be run with the local job runner.", "display_name": "Application Framework", "name": "mapreduce_framework_name", "value": "yarn" }, { "desc": "Base sleep time between failover attempts. Used only if RM HA is enabled.", "display_name": "Client Failover Sleep Base Time", "name": "client_failover_sleep_base", "value": "100" }, { "desc": "For advanced use only, a string to be inserted into the client configuration for yarn-site.xml.", "display_name": "YARN Client Advanced Configuration Snippet (Safety Valve) for yarn-site.xml", "name": "yarn_client_config_safety_valve", "value": null }, { "desc": "Size of buffer for read and write operations of SequenceFiles.", "display_name": "SequenceFile I/O Buffer Size", "name": "io_file_buffer_size", "value": "65536" }, { "desc": "Fraction of the number of map tasks in the job which should be completed before reduce tasks are scheduled for the job.", "display_name": "Number of Map Tasks to Complete Before Reduce Tasks", "name": "mapred_reduce_slowstart_completed_maps", "value": "0.8" }, { "desc": "For advanced use only, a string to be inserted into the client configuration for mapred-site.xml.", "display_name": "MapReduce Client Advanced Configuration Snippet (Safety Valve) for mapred-site.xml", "name": "mapreduce_client_config_safety_valve", "value": null }, { "desc": "The maximum heap size, in bytes, of the Java MapReduce ApplicationMaster. This number will be formatted and concatenated with 'ApplicationMaster Java Opts Base' to pass to Hadoop.", "display_name": "ApplicationMaster Java Maximum Heap Size", "name": "yarn_app_mapreduce_am_max_heap", "value": "825955249" }, { "desc": "The replication level for submitted job files.", "display_name": "Mapreduce Submit Replication", "name": "mapred_submit_replication", "value": "10" }, { "desc": "The total amount of memory buffer, in megabytes, to use while sorting files. Note that this memory comes out of the user JVM heap size (meaning total user JVM heap - this amount of memory = total user usable heap space. Note that Cloudera's default differs from Hadoop's default; Cloudera uses a bigger buffer by default because modern machines often have more RAM. The smallest value across all TaskTrackers will be part of generated client configuration.", "display_name": "I/O Sort Memory Buffer (MiB)", "name": "io_sort_mb", "value": "256" }, { "desc": "Whether to enable ubertask optimization, which runs \"sufficiently small\" jobs sequentially within a single JVM. \"Small\" is defined by the mapreduce.job.ubertask.maxmaps, mapreduce.job.ubertask.maxreduces, and mapreduce.job.ubertask.maxbytes settings.", "display_name": "Enable Ubertask Optimization", "name": "mapreduce_job_ubertask_enabled", "value": "false" }, { "desc": "Java command line arguments passed to the MapReduce ApplicationMaster.", "display_name": "ApplicationMaster Java Opts Base", "name": "yarn_app_mapreduce_am_command_opts", "value": "-Djava.net.preferIPv4Stack=true" }, { "desc": "The amount of physical memory, in MiB, allocated for each reduce task of a job. This parameter has no effect prior to CDH 4.4.", "display_name": "Reduce Task Memory", "name": "mapreduce_reduce_memory_mb", "value": "1024" }, { "desc": "The maximum permissible size of the split metainfo file. The JobTracker won't attempt to read split metainfo files bigger than the configured value. No limits if set to -1.", "display_name": "JobTracker MetaInfo Maxsize", "name": "mapreduce_jobtracker_split_metainfo_maxsize", "value": "10000000" }, { "desc": "Compression level for the codec used to compress MapReduce outputs. Default compression is a balance between speed and compression ratio.", "display_name": "Compression Level of Codecs", "name": "zlib_compress_level", "value": "DEFAULT_COMPRESSION" }, { "desc": "A shared directory for temporary files.", "display_name": "Shared Temp Directories", "name": "mapreduce_cluster_temp_dir", "value": null }, { "desc": "These are Java command line arguments. Commonly, garbage collection flags or extra debugging flags would be passed here.", "display_name": "Client Java Configuration Options", "name": "mapreduce_client_java_opts", "value": "-Djava.net.preferIPv4Stack=true" }, { "desc": "The physical memory requirement, in MiB, for the ApplicationMaster.", "display_name": "ApplicationMaster Memory", "name": "yarn_app_mapreduce_am_resource_mb", "value": "1024" }, { "desc": "The maximum Java heap size, in bytes, of the reduce processes. This number will be formatted and concatenated with 'Reduce Task Java Opts Base' to pass to Hadoop.", "display_name": "Reduce Task Maximum Heap Size", "name": "mapreduce_reduce_java_opts_max_heap", "value": "825955249" }, { "desc": "Maximum size in bytes for the Java process heap memory. Passed to Java -Xmx.", "display_name": "Client Java Heap Size in Bytes", "name": "mapreduce_client_java_heapsize", "value": "825955249" }, { "desc": "The number of milliseconds before a task will be terminated if it neither reads an input, writes an output, nor updates its status string.", "display_name": "Mapreduce Task Timeout", "name": "mapred_task_timeout", "value": "600000" }, { "desc": "The virtual CPU cores requirement, for the ApplicationMaster. This parameter has no effect prior to CDH 4.4.", "display_name": "ApplicationMaster Virtual CPU Cores", "name": "yarn_app_mapreduce_am_resource_cpu_vcores", "value": "1" }, { "desc": "The amount of physical memory, in MiB, allocated for each map task of a job.", "display_name": "Map Task Memory", "name": "mapreduce_map_memory_mb", "value": "1024" }, { "desc": "Threshold for number of reduces, beyond which a job is considered too big for ubertask optimization. Note: As of CDH 5, MR2 does not support more than one reduce in an ubertask. (Zero is valid.)", "display_name": "Ubertask Maximum Reduces", "name": "mapreduce_job_ubertask_maxreduces", "value": "1" }, { "desc": "For MapReduce map outputs that are compressed, specify the compression codec to use. Will be part of generated client configuration.", "display_name": "Compression Codec of MapReduce Map Output", "name": "mapred_map_output_compression_codec", "value": "org.apache.hadoop.io.compress.SnappyCodec" }, { "desc": "Classpaths to include for MapReduce applications.", "display_name": "MR Application Classpath", "name": "mapreduce_application_classpath", "value": "$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,$MR2_CLASSPATH" }, { "desc": "When set, each role identifies important log events and forwards them to Cloudera Manager.", "display_name": "Enable Log Event Capture", "name": "catch_events", "value": "true" }, { "desc": "Maximum sleep time between failover attempts. Used only if RM HA is enabled.", "display_name": "Client Failover Sleep Max Time", "name": "client_failover_sleep_max", "value": "2000" }, { "desc": "Limit on the number of counters allowed per job.", "display_name": "Job Counters Limit", "name": "mapreduce_job_counters_limit", "value": "120" }, { "desc": "Compress the output of MapReduce jobs. Will be part of generated client configuration.", "display_name": "Compress MapReduce Job Output", "name": "mapred_output_compress", "value": "false" }, { "desc": "For MapReduce job outputs that are compressed as SequenceFiles, you can select one of these compression type options: NONE, RECORD or BLOCK. Cloudera recommends BLOCK. Will be part of generated client configuration.", "display_name": "Compression Type of MapReduce Job Output", "name": "mapred_output_compression_type", "value": "BLOCK" }, { "desc": "Java opts for the map processes. The following symbol, if present, will be interpolated: @taskid@ is replaced by current TaskID. Any other occurrences of '@' will go unchanged. For example, to enable verbose gc logging to a file named for the taskid in /tmp pass a value of: \"-verbose:gc -Xloggc:/tmp/@taskid@.gc\". The configuration variable 'Map Task Memory' can be used to control the maximum memory of the map processes.", "display_name": "Map Task Java Opts Base", "name": "mapreduce_map_java_opts", "value": "-Djava.net.preferIPv4Stack=true" }, { "desc": "Additional execution environment entries for map and reduce task processes.", "display_name": "MR Application Environment", "name": "mapreduce_admin_user_env", "value": "LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native:$JAVA_LIBRARY_PATH" }, { "desc": "The soft limit in either the buffer or record collection buffers. When this limit is reached, a thread will begin to spill the contents to disk in the background. Note that this does not imply any chunking of data to the spill. A value less than 0.5 is not recommended. The syntax is in decimal units; the default is 80% and is formatted 0.8. Will be part of generated client configuration.", "display_name": "I/O Sort Spill Percent", "name": "io_sort_spill_percent", "value": "0.8" }, { "desc": "The default number of reduce tasks per job. Will be part of generated client configuration.", "display_name": "Default Number of Reduce Tasks per Job", "name": "mapred_reduce_tasks", "value": "1" }, { "desc": "Maximum allowed connections for the shuffle. Set to 0 (zero) to indicate no limit on the number of connections.", "display_name": "Max Shuffle Connections", "name": "mapreduce_shuffle_max_connections", "value": "80" }, { "desc": "The maximum Java heap size, in bytes, of the map processes. This number will be formatted and concatenated with 'Map Task Java Opts Base' to pass to Hadoop.", "display_name": "Map Task Maximum Heap Size", "name": "mapreduce_map_java_opts_max_heap", "value": "825955249" }, { "desc": "For advanced use only, key-value pairs (one on each line) to be inserted into the client configuration for hadoop-env.sh", "display_name": "Gateway Client Environment Advanced Configuration Snippet for hadoop-env.sh (Safety Valve)", "name": "mapreduce_client_env_safety_valve", "value": null }, { "desc": "The number of virtual CPU cores for each reduce task of a job.", "display_name": "Reduce Task CPU Virtual Cores", "name": "mapreduce_reduce_cpu_vcores", "value": "1" }, { "desc": "Environment variables for the MapReduce ApplicationMaster. These settings can be overridden in the Application Master User Environment (yarn.app.mapreduce.am.env).", "display_name": "ApplicationMaster Environment", "name": "yarn_app_mapreduce_am_admin_user_env", "value": "LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native:$JAVA_LIBRARY_PATH" }, { "desc": "Java opts for the reduce processes. The following symbol, if present, will be interpolated: @taskid@ is replaced by current TaskID. Any other occurrences of '@' will go unchanged. For example, to enable verbose gc logging to a file named for the taskid in /tmp pass a value of: \"-verbose:gc -Xloggc:/tmp/@taskid@.gc\". The configuration variable 'Reduce Task Memory' can be used to control the maximum memory of the reduce processes.", "display_name": "Reduce Task Java Opts Base", "name": "mapreduce_reduce_java_opts", "value": "-Djava.net.preferIPv4Stack=true" }, { "desc": "When set, Cloudera Manager will send alerts when this entity's configuration changes.", "display_name": "Enable Configuration Change Alerts", "name": "enable_config_alerts", "value": "false" }, { "desc": "If enabled, uses compression on the map outputs before they are sent across the network. Will be part of generated client configuration.", "display_name": "Use Compression on Map Outputs", "name": "mapred_compress_map_output", "value": "true" }, { "desc": "Threshold for number of maps, beyond which a job is considered too big for ubertask optimization.", "display_name": "Ubertask Maximum Maps", "name": "mapreduce_job_ubertask_maxmaps", "value": "9" }, { "desc": "If enabled, multiple instances of some map tasks may be executed in parallel.", "display_name": "Map Tasks Speculative Execution", "name": "mapred_map_tasks_speculative_execution", "value": "false" }, { "desc": "Threshold for number of input bytes, beyond which a job is considered too big for ubertask optimization. If no value is specified, dfs.block.size is used as a default.", "display_name": "Ubertask Maximum Job Size", "name": "mapreduce_job_ubertask_maxbytes", "value": null } ]