Add support for elasticsearch cluster.
We need to expand our elasticsearch install base. Update puppet to make this possible. Change-Id: Id0dae839b12ebf47715cf40a363832e0f661a94f Reviewed-on: https://review.openstack.org/33910 Reviewed-by: Jeremy Stanley <fungi@yuggoth.org> Reviewed-by: James E. Blair <corvus@inaugust.com> Approved: Clark Boylan <clark.boylan@gmail.com> Tested-by: Jenkins
This commit is contained in:
		| @@ -14,7 +14,9 @@ | |||||||
| # | # | ||||||
| # Class to install elasticsearch. | # Class to install elasticsearch. | ||||||
| # | # | ||||||
| class logstash::elasticsearch { | class logstash::elasticsearch ( | ||||||
|  |   discover_node = 'localhost' | ||||||
|  | ) { | ||||||
|   # install java runtime |   # install java runtime | ||||||
|   package { 'java7-runtime-headless': |   package { 'java7-runtime-headless': | ||||||
|     ensure => present, |     ensure => present, | ||||||
| @@ -40,7 +42,7 @@ class logstash::elasticsearch { | |||||||
|  |  | ||||||
|   file { '/etc/elasticsearch/elasticsearch.yml': |   file { '/etc/elasticsearch/elasticsearch.yml': | ||||||
|     ensure  => present, |     ensure  => present, | ||||||
|     source  => 'puppet:///modules/logstash/elasticsearch.yml', |     content => template('logstash/elasticsearch.yml.erb'), | ||||||
|     replace => true, |     replace => true, | ||||||
|     owner   => 'root', |     owner   => 'root', | ||||||
|     group   => 'root', |     group   => 'root', | ||||||
|   | |||||||
							
								
								
									
										370
									
								
								templates/elasticsearch.yml.erb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										370
									
								
								templates/elasticsearch.yml.erb
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,370 @@ | |||||||
|  | ##################### ElasticSearch Configuration Example ##################### | ||||||
|  |  | ||||||
|  | # This file contains an overview of various configuration settings, | ||||||
|  | # targeted at operations staff. Application developers should | ||||||
|  | # consult the guide at <http://elasticsearch.org/guide>. | ||||||
|  | # | ||||||
|  | # The installation procedure is covered at | ||||||
|  | # <http://elasticsearch.org/guide/reference/setup/installation.html>. | ||||||
|  | # | ||||||
|  | # ElasticSearch comes with reasonable defaults for most settings, | ||||||
|  | # so you can try it out without bothering with configuration. | ||||||
|  | # | ||||||
|  | # Most of the time, these defaults are just fine for running a production | ||||||
|  | # cluster. If you're fine-tuning your cluster, or wondering about the | ||||||
|  | # effect of certain configuration option, please _do ask_ on the | ||||||
|  | # mailing list or IRC channel [http://elasticsearch.org/community]. | ||||||
|  |  | ||||||
|  | # Any element in the configuration can be replaced with environment variables | ||||||
|  | # by placing them in ${...} notation. For example: | ||||||
|  | # | ||||||
|  | # node.rack: ${RACK_ENV_VAR} | ||||||
|  |  | ||||||
|  | # See <http://elasticsearch.org/guide/reference/setup/configuration.html> | ||||||
|  | # for information on supported formats and syntax for the configuration file. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ################################### Cluster ################################### | ||||||
|  |  | ||||||
|  | # Cluster name identifies your cluster for auto-discovery. If you're running | ||||||
|  | # multiple clusters on the same network, make sure you're using unique names. | ||||||
|  | # | ||||||
|  | # cluster.name: elasticsearch | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #################################### Node ##################################### | ||||||
|  |  | ||||||
|  | # Node names are generated dynamically on startup, so you're relieved | ||||||
|  | # from configuring them manually. You can tie this node to a specific name: | ||||||
|  | # | ||||||
|  | node.name: "<%= scope.lookupvar("::hostname") %>" | ||||||
|  |  | ||||||
|  | # Every node can be configured to allow or deny being eligible as the master, | ||||||
|  | # and to allow or deny to store the data. | ||||||
|  | # | ||||||
|  | # Allow this node to be eligible as a master node (enabled by default): | ||||||
|  | # | ||||||
|  | # node.master: true | ||||||
|  | # | ||||||
|  | # Allow this node to store data (enabled by default): | ||||||
|  | # | ||||||
|  | # node.data: true | ||||||
|  |  | ||||||
|  | # You can exploit these settings to design advanced cluster topologies. | ||||||
|  | # | ||||||
|  | # 1. You want this node to never become a master node, only to hold data. | ||||||
|  | #    This will be the "workhorse" of your cluster. | ||||||
|  | # | ||||||
|  | # node.master: false | ||||||
|  | # node.data: true | ||||||
|  | # | ||||||
|  | # 2. You want this node to only serve as a master: to not store any data and | ||||||
|  | #    to have free resources. This will be the "coordinator" of your cluster. | ||||||
|  | # | ||||||
|  | # node.master: true | ||||||
|  | # node.data: false | ||||||
|  | # | ||||||
|  | # 3. You want this node to be neither master nor data node, but | ||||||
|  | #    to act as a "search load balancer" (fetching data from nodes, | ||||||
|  | #    aggregating results, etc.) | ||||||
|  | # | ||||||
|  | # node.master: false | ||||||
|  | # node.data: false | ||||||
|  |  | ||||||
|  | # Use the Cluster Health API [http://localhost:9200/_cluster/health], the | ||||||
|  | # Node Info API [http://localhost:9200/_cluster/nodes] or GUI tools | ||||||
|  | # such as <http://github.com/lukas-vlcek/bigdesk> and | ||||||
|  | # <http://mobz.github.com/elasticsearch-head> to inspect the cluster state. | ||||||
|  |  | ||||||
|  | # A node can have generic attributes associated with it, which can later be used | ||||||
|  | # for customized shard allocation filtering, or allocation awareness. An attribute | ||||||
|  | # is a simple key value pair, similar to node.key: value, here is an example: | ||||||
|  | # | ||||||
|  | # node.rack: rack314 | ||||||
|  |  | ||||||
|  | # By default, multiple nodes are allowed to start from the same installation location | ||||||
|  | # to disable it, set the following: | ||||||
|  | # node.max_local_storage_nodes: 1 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #################################### Index #################################### | ||||||
|  |  | ||||||
|  | # You can set a number of options (such as shard/replica options, mapping | ||||||
|  | # or analyzer definitions, translog settings, ...) for indices globally, | ||||||
|  | # in this file. | ||||||
|  | # | ||||||
|  | # Note, that it makes more sense to configure index settings specifically for | ||||||
|  | # a certain index, either when creating it or by using the index templates API. | ||||||
|  | # | ||||||
|  | # See <http://elasticsearch.org/guide/reference/index-modules/> and | ||||||
|  | # <http://elasticsearch.org/guide/reference/api/admin-indices-create-index.html> | ||||||
|  | # for more information. | ||||||
|  |  | ||||||
|  | # Set the number of shards (splits) of an index (5 by default): | ||||||
|  | # | ||||||
|  | # index.number_of_shards: 5 | ||||||
|  |  | ||||||
|  | # Set the number of replicas (additional copies) of an index (1 by default): | ||||||
|  | # | ||||||
|  | # index.number_of_replicas: 1 | ||||||
|  |  | ||||||
|  | # Note, that for development on a local machine, with small indices, it usually | ||||||
|  | # makes sense to "disable" the distributed features: | ||||||
|  | # | ||||||
|  | # index.number_of_shards: 1 | ||||||
|  | # index.number_of_replicas: 0 | ||||||
|  |  | ||||||
|  | # These settings directly affect the performance of index and search operations | ||||||
|  | # in your cluster. Assuming you have enough machines to hold shards and | ||||||
|  | # replicas, the rule of thumb is: | ||||||
|  | # | ||||||
|  | # 1. Having more *shards* enhances the _indexing_ performance and allows to | ||||||
|  | #    _distribute_ a big index across machines. | ||||||
|  | # 2. Having more *replicas* enhances the _search_ performance and improves the | ||||||
|  | #    cluster _availability_. | ||||||
|  | # | ||||||
|  | # The "number_of_shards" is a one-time setting for an index. | ||||||
|  | # | ||||||
|  | # The "number_of_replicas" can be increased or decreased anytime, | ||||||
|  | # by using the Index Update Settings API. | ||||||
|  | # | ||||||
|  | # ElasticSearch takes care about load balancing, relocating, gathering the | ||||||
|  | # results from nodes, etc. Experiment with different settings to fine-tune | ||||||
|  | # your setup. | ||||||
|  |  | ||||||
|  | # Use the Index Status API (<http://localhost:9200/A/_status>) to inspect | ||||||
|  | # the index status. | ||||||
|  |  | ||||||
|  | # Compress stored fields and term vector. | ||||||
|  | index.store.compress.stored: true | ||||||
|  | index.store.compress.tv: true | ||||||
|  |  | ||||||
|  | indices.memory.index_buffer_size: "33%" | ||||||
|  |  | ||||||
|  | #################################### Paths #################################### | ||||||
|  |  | ||||||
|  | # Path to directory containing configuration (this file and logging.yml): | ||||||
|  | # | ||||||
|  | # path.conf: /path/to/conf | ||||||
|  |  | ||||||
|  | # Path to directory where to store index data allocated for this node. | ||||||
|  | # | ||||||
|  | # path.data: /path/to/data | ||||||
|  | # | ||||||
|  | # Can optionally include more than one location, causing data to be striped across | ||||||
|  | # the locations (a la RAID 0) on a file level, favouring locations with most free | ||||||
|  | # space on creation. For example: | ||||||
|  | # | ||||||
|  | # path.data: /path/to/data1,/path/to/data2 | ||||||
|  |  | ||||||
|  | # Path to temporary files: | ||||||
|  | # | ||||||
|  | # path.work: /path/to/work | ||||||
|  |  | ||||||
|  | # Path to log files: | ||||||
|  | # | ||||||
|  | # path.logs: /path/to/logs | ||||||
|  |  | ||||||
|  | # Path to where plugins are installed: | ||||||
|  | # | ||||||
|  | # path.plugins: /path/to/plugins | ||||||
|  |  | ||||||
|  |  | ||||||
|  | #################################### Plugin ################################### | ||||||
|  |  | ||||||
|  | # If a plugin listed here is not installed for current node, the node will not start. | ||||||
|  | # | ||||||
|  | # plugin.mandatory: mapper-attachments,lang-groovy | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ################################### Memory #################################### | ||||||
|  |  | ||||||
|  | # ElasticSearch performs poorly when JVM starts swapping: you should ensure that | ||||||
|  | # it _never_ swaps. | ||||||
|  | # | ||||||
|  | # Set this property to true to lock the memory: | ||||||
|  | # | ||||||
|  | bootstrap.mlockall: true | ||||||
|  |  | ||||||
|  | # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set | ||||||
|  | # to the same value, and that the machine has enough memory to allocate | ||||||
|  | # for ElasticSearch, leaving enough memory for the operating system itself. | ||||||
|  | # | ||||||
|  | # You should also make sure that the ElasticSearch process is allowed to lock | ||||||
|  | # the memory, eg. by using `ulimit -l unlimited`. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ############################## Network And HTTP ############################### | ||||||
|  |  | ||||||
|  | # ElasticSearch, by default, binds itself to the 0.0.0.0 address, and listens | ||||||
|  | # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node | ||||||
|  | # communication. (the range means that if the port is busy, it will automatically | ||||||
|  | # try the next port). | ||||||
|  |  | ||||||
|  | # Set the bind address specifically (IPv4 or IPv6): | ||||||
|  | # | ||||||
|  | # network.bind_host: 192.168.0.1 | ||||||
|  |  | ||||||
|  | # Set the address other nodes will use to communicate with this node. If not | ||||||
|  | # set, it is automatically derived. It must point to an actual IP address. | ||||||
|  | # | ||||||
|  | # network.publish_host: 192.168.0.1 | ||||||
|  |  | ||||||
|  | # Set both 'bind_host' and 'publish_host': | ||||||
|  | # | ||||||
|  | # network.host: 192.168.0.1 | ||||||
|  |  | ||||||
|  | # Set a custom port for the node to node communication (9300 by default): | ||||||
|  | # | ||||||
|  | # transport.tcp.port: 9300 | ||||||
|  |  | ||||||
|  | # Enable compression for all communication between nodes (disabled by default): | ||||||
|  | # | ||||||
|  | # transport.tcp.compress: true | ||||||
|  |  | ||||||
|  | # Set a custom port to listen for HTTP traffic: | ||||||
|  | # | ||||||
|  | # http.port: 9200 | ||||||
|  |  | ||||||
|  | # Set a custom allowed content length: | ||||||
|  | # | ||||||
|  | # http.max_content_length: 100mb | ||||||
|  |  | ||||||
|  | # Disable HTTP completely: | ||||||
|  | # | ||||||
|  | # http.enabled: false | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ################################### Gateway ################################### | ||||||
|  |  | ||||||
|  | # The gateway allows for persisting the cluster state between full cluster | ||||||
|  | # restarts. Every change to the state (such as adding an index) will be stored | ||||||
|  | # in the gateway, and when the cluster starts up for the first time, | ||||||
|  | # it will read its state from the gateway. | ||||||
|  |  | ||||||
|  | # There are several types of gateway implementations. For more information, | ||||||
|  | # see <http://elasticsearch.org/guide/reference/modules/gateway>. | ||||||
|  |  | ||||||
|  | # The default gateway type is the "local" gateway (recommended): | ||||||
|  | # | ||||||
|  | # gateway.type: local | ||||||
|  |  | ||||||
|  | # Settings below control how and when to start the initial recovery process on | ||||||
|  | # a full cluster restart (to reuse as much local data as possible when using shared | ||||||
|  | # gateway). | ||||||
|  |  | ||||||
|  | # Allow recovery process after N nodes in a cluster are up: | ||||||
|  | # | ||||||
|  | # gateway.recover_after_nodes: 1 | ||||||
|  |  | ||||||
|  | # Set the timeout to initiate the recovery process, once the N nodes | ||||||
|  | # from previous setting are up (accepts time value): | ||||||
|  | # | ||||||
|  | # gateway.recover_after_time: 5m | ||||||
|  |  | ||||||
|  | # Set how many nodes are expected in this cluster. Once these N nodes | ||||||
|  | # are up (and recover_after_nodes is met), begin recovery process immediately | ||||||
|  | # (without waiting for recover_after_time to expire): | ||||||
|  | # | ||||||
|  | # gateway.expected_nodes: 2 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ############################# Recovery Throttling ############################# | ||||||
|  |  | ||||||
|  | # These settings allow to control the process of shards allocation between | ||||||
|  | # nodes during initial recovery, replica allocation, rebalancing, | ||||||
|  | # or when adding and removing nodes. | ||||||
|  |  | ||||||
|  | # Set the number of concurrent recoveries happening on a node: | ||||||
|  | # | ||||||
|  | # 1. During the initial recovery | ||||||
|  | # | ||||||
|  | # cluster.routing.allocation.node_initial_primaries_recoveries: 4 | ||||||
|  | # | ||||||
|  | # 2. During adding/removing nodes, rebalancing, etc | ||||||
|  | # | ||||||
|  | # cluster.routing.allocation.node_concurrent_recoveries: 2 | ||||||
|  |  | ||||||
|  | # Set to throttle throughput when recovering (eg. 100mb, by default unlimited): | ||||||
|  | # | ||||||
|  | # indices.recovery.max_size_per_sec: 0 | ||||||
|  |  | ||||||
|  | # Set to limit the number of open concurrent streams when | ||||||
|  | # recovering a shard from a peer: | ||||||
|  | # | ||||||
|  | # indices.recovery.concurrent_streams: 5 | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ################################## Discovery ################################## | ||||||
|  |  | ||||||
|  | # Discovery infrastructure ensures nodes can be found within a cluster | ||||||
|  | # and master node is elected. Multicast discovery is the default. | ||||||
|  |  | ||||||
|  | # Set to ensure a node sees N other master eligible nodes to be considered | ||||||
|  | # operational within the cluster. Set this option to a higher value (2-4) | ||||||
|  | # for large clusters (>3 nodes): | ||||||
|  | # | ||||||
|  | # discovery.zen.minimum_master_nodes: 1 | ||||||
|  |  | ||||||
|  | # Set the time to wait for ping responses from other nodes when discovering. | ||||||
|  | # Set this option to a higher value on a slow or congested network | ||||||
|  | # to minimize discovery failures: | ||||||
|  | # | ||||||
|  | # discovery.zen.ping.timeout: 3s | ||||||
|  |  | ||||||
|  | # See <http://elasticsearch.org/guide/reference/modules/discovery/zen.html> | ||||||
|  | # for more information. | ||||||
|  |  | ||||||
|  | # Unicast discovery allows to explicitly control which nodes will be used | ||||||
|  | # to discover the cluster. It can be used when multicast is not present, | ||||||
|  | # or to restrict the cluster communication-wise. | ||||||
|  | # | ||||||
|  | # 1. Disable multicast discovery (enabled by default): | ||||||
|  | # | ||||||
|  | discovery.zen.ping.multicast.enabled: false | ||||||
|  | # | ||||||
|  | # 2. Configure an initial list of master nodes in the cluster | ||||||
|  | #    to perform discovery when new nodes (master or data) are started: | ||||||
|  | # | ||||||
|  | # discovery.zen.ping.unicast.hosts: ["host1", "host2:port", "host3[portX-portY]"] | ||||||
|  | discovery.zen.ping.unicast.hosts: ["<%= discover_node %>"] | ||||||
|  |  | ||||||
|  | # EC2 discovery allows to use AWS EC2 API in order to perform discovery. | ||||||
|  | # | ||||||
|  | # You have to install the cloud-aws plugin for enabling the EC2 discovery. | ||||||
|  | # | ||||||
|  | # See <http://elasticsearch.org/guide/reference/modules/discovery/ec2.html> | ||||||
|  | # for more information. | ||||||
|  | # | ||||||
|  | # See <http://elasticsearch.org/tutorials/2011/08/22/elasticsearch-on-ec2.html> | ||||||
|  | # for a step-by-step tutorial. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ################################## Slow Log ################################## | ||||||
|  |  | ||||||
|  | # Shard level query and fetch threshold logging. | ||||||
|  |  | ||||||
|  | #index.search.slowlog.threshold.query.warn: 10s | ||||||
|  | #index.search.slowlog.threshold.query.info: 5s | ||||||
|  | #index.search.slowlog.threshold.query.debug: 2s | ||||||
|  | #index.search.slowlog.threshold.query.trace: 500ms | ||||||
|  |  | ||||||
|  | #index.search.slowlog.threshold.fetch.warn: 1s | ||||||
|  | #index.search.slowlog.threshold.fetch.info: 800ms | ||||||
|  | #index.search.slowlog.threshold.fetch.debug: 500ms | ||||||
|  | #index.search.slowlog.threshold.fetch.trace: 200ms | ||||||
|  |  | ||||||
|  | #index.indexing.slowlog.threshold.index.warn: 10s | ||||||
|  | #index.indexing.slowlog.threshold.index.info: 5s | ||||||
|  | #index.indexing.slowlog.threshold.index.debug: 2s | ||||||
|  | #index.indexing.slowlog.threshold.index.trace: 500ms | ||||||
|  |  | ||||||
|  | ################################## GC Logging ################################ | ||||||
|  |  | ||||||
|  | #monitor.jvm.gc.ParNew.warn: 1000ms | ||||||
|  | #monitor.jvm.gc.ParNew.info: 700ms | ||||||
|  | #monitor.jvm.gc.ParNew.debug: 400ms | ||||||
|  |  | ||||||
|  | #monitor.jvm.gc.ConcurrentMarkSweep.warn: 10s | ||||||
|  | #monitor.jvm.gc.ConcurrentMarkSweep.info: 5s | ||||||
|  | #monitor.jvm.gc.ConcurrentMarkSweep.debug: 2s | ||||||
		Reference in New Issue
	
	Block a user
	 Clark Boylan
					Clark Boylan