From ffe95a7e43a00f1e596fd5e33b6ef69ae778263b Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Thu, 20 Jun 2013 18:37:41 -0700 Subject: [PATCH] Add support for elasticsearch cluster. We need to expand our elasticsearch install base. Update puppet to make this possible. Change-Id: Id0dae839b12ebf47715cf40a363832e0f661a94f Reviewed-on: https://review.openstack.org/33910 Reviewed-by: Jeremy Stanley Reviewed-by: James E. Blair Approved: Clark Boylan Tested-by: Jenkins --- manifests/init.pp | 6 +- templates/elasticsearch.yml.erb | 370 ++++++++++++++++++++++++++++++++ 2 files changed, 374 insertions(+), 2 deletions(-) create mode 100644 templates/elasticsearch.yml.erb diff --git a/manifests/init.pp b/manifests/init.pp index c16d86e..68018b9 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -14,7 +14,9 @@ # # Class to install elasticsearch. # -class logstash::elasticsearch { +class logstash::elasticsearch ( + discover_node = 'localhost' +) { # install java runtime package { 'java7-runtime-headless': ensure => present, @@ -40,7 +42,7 @@ class logstash::elasticsearch { file { '/etc/elasticsearch/elasticsearch.yml': ensure => present, - source => 'puppet:///modules/logstash/elasticsearch.yml', + content => template('logstash/elasticsearch.yml.erb'), replace => true, owner => 'root', group => 'root', diff --git a/templates/elasticsearch.yml.erb b/templates/elasticsearch.yml.erb new file mode 100644 index 0000000..1803562 --- /dev/null +++ b/templates/elasticsearch.yml.erb @@ -0,0 +1,370 @@ +##################### ElasticSearch Configuration Example ##################### + +# This file contains an overview of various configuration settings, +# targeted at operations staff. Application developers should +# consult the guide at . +# +# The installation procedure is covered at +# . +# +# ElasticSearch comes with reasonable defaults for most settings, +# so you can try it out without bothering with configuration. +# +# Most of the time, these defaults are just fine for running a production +# cluster. If you're fine-tuning your cluster, or wondering about the +# effect of certain configuration option, please _do ask_ on the +# mailing list or IRC channel [http://elasticsearch.org/community]. + +# Any element in the configuration can be replaced with environment variables +# by placing them in ${...} notation. For example: +# +# node.rack: ${RACK_ENV_VAR} + +# See +# for information on supported formats and syntax for the configuration file. + + +################################### Cluster ################################### + +# Cluster name identifies your cluster for auto-discovery. If you're running +# multiple clusters on the same network, make sure you're using unique names. +# +# cluster.name: elasticsearch + + +#################################### Node ##################################### + +# Node names are generated dynamically on startup, so you're relieved +# from configuring them manually. You can tie this node to a specific name: +# +node.name: "<%= scope.lookupvar("::hostname") %>" + +# Every node can be configured to allow or deny being eligible as the master, +# and to allow or deny to store the data. +# +# Allow this node to be eligible as a master node (enabled by default): +# +# node.master: true +# +# Allow this node to store data (enabled by default): +# +# node.data: true + +# You can exploit these settings to design advanced cluster topologies. +# +# 1. You want this node to never become a master node, only to hold data. +# This will be the "workhorse" of your cluster. +# +# node.master: false +# node.data: true +# +# 2. You want this node to only serve as a master: to not store any data and +# to have free resources. This will be the "coordinator" of your cluster. +# +# node.master: true +# node.data: false +# +# 3. You want this node to be neither master nor data node, but +# to act as a "search load balancer" (fetching data from nodes, +# aggregating results, etc.) +# +# node.master: false +# node.data: false + +# Use the Cluster Health API [http://localhost:9200/_cluster/health], the +# Node Info API [http://localhost:9200/_cluster/nodes] or GUI tools +# such as and +# to inspect the cluster state. + +# A node can have generic attributes associated with it, which can later be used +# for customized shard allocation filtering, or allocation awareness. An attribute +# is a simple key value pair, similar to node.key: value, here is an example: +# +# node.rack: rack314 + +# By default, multiple nodes are allowed to start from the same installation location +# to disable it, set the following: +# node.max_local_storage_nodes: 1 + + +#################################### Index #################################### + +# You can set a number of options (such as shard/replica options, mapping +# or analyzer definitions, translog settings, ...) for indices globally, +# in this file. +# +# Note, that it makes more sense to configure index settings specifically for +# a certain index, either when creating it or by using the index templates API. +# +# See and +# +# for more information. + +# Set the number of shards (splits) of an index (5 by default): +# +# index.number_of_shards: 5 + +# Set the number of replicas (additional copies) of an index (1 by default): +# +# index.number_of_replicas: 1 + +# Note, that for development on a local machine, with small indices, it usually +# makes sense to "disable" the distributed features: +# +# index.number_of_shards: 1 +# index.number_of_replicas: 0 + +# These settings directly affect the performance of index and search operations +# in your cluster. Assuming you have enough machines to hold shards and +# replicas, the rule of thumb is: +# +# 1. Having more *shards* enhances the _indexing_ performance and allows to +# _distribute_ a big index across machines. +# 2. Having more *replicas* enhances the _search_ performance and improves the +# cluster _availability_. +# +# The "number_of_shards" is a one-time setting for an index. +# +# The "number_of_replicas" can be increased or decreased anytime, +# by using the Index Update Settings API. +# +# ElasticSearch takes care about load balancing, relocating, gathering the +# results from nodes, etc. Experiment with different settings to fine-tune +# your setup. + +# Use the Index Status API () to inspect +# the index status. + +# Compress stored fields and term vector. +index.store.compress.stored: true +index.store.compress.tv: true + +indices.memory.index_buffer_size: "33%" + +#################################### Paths #################################### + +# Path to directory containing configuration (this file and logging.yml): +# +# path.conf: /path/to/conf + +# Path to directory where to store index data allocated for this node. +# +# path.data: /path/to/data +# +# Can optionally include more than one location, causing data to be striped across +# the locations (a la RAID 0) on a file level, favouring locations with most free +# space on creation. For example: +# +# path.data: /path/to/data1,/path/to/data2 + +# Path to temporary files: +# +# path.work: /path/to/work + +# Path to log files: +# +# path.logs: /path/to/logs + +# Path to where plugins are installed: +# +# path.plugins: /path/to/plugins + + +#################################### Plugin ################################### + +# If a plugin listed here is not installed for current node, the node will not start. +# +# plugin.mandatory: mapper-attachments,lang-groovy + + +################################### Memory #################################### + +# ElasticSearch performs poorly when JVM starts swapping: you should ensure that +# it _never_ swaps. +# +# Set this property to true to lock the memory: +# +bootstrap.mlockall: true + +# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set +# to the same value, and that the machine has enough memory to allocate +# for ElasticSearch, leaving enough memory for the operating system itself. +# +# You should also make sure that the ElasticSearch process is allowed to lock +# the memory, eg. by using `ulimit -l unlimited`. + + +############################## Network And HTTP ############################### + +# ElasticSearch, by default, binds itself to the 0.0.0.0 address, and listens +# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node +# communication. (the range means that if the port is busy, it will automatically +# try the next port). + +# Set the bind address specifically (IPv4 or IPv6): +# +# network.bind_host: 192.168.0.1 + +# Set the address other nodes will use to communicate with this node. If not +# set, it is automatically derived. It must point to an actual IP address. +# +# network.publish_host: 192.168.0.1 + +# Set both 'bind_host' and 'publish_host': +# +# network.host: 192.168.0.1 + +# Set a custom port for the node to node communication (9300 by default): +# +# transport.tcp.port: 9300 + +# Enable compression for all communication between nodes (disabled by default): +# +# transport.tcp.compress: true + +# Set a custom port to listen for HTTP traffic: +# +# http.port: 9200 + +# Set a custom allowed content length: +# +# http.max_content_length: 100mb + +# Disable HTTP completely: +# +# http.enabled: false + + +################################### Gateway ################################### + +# The gateway allows for persisting the cluster state between full cluster +# restarts. Every change to the state (such as adding an index) will be stored +# in the gateway, and when the cluster starts up for the first time, +# it will read its state from the gateway. + +# There are several types of gateway implementations. For more information, +# see . + +# The default gateway type is the "local" gateway (recommended): +# +# gateway.type: local + +# Settings below control how and when to start the initial recovery process on +# a full cluster restart (to reuse as much local data as possible when using shared +# gateway). + +# Allow recovery process after N nodes in a cluster are up: +# +# gateway.recover_after_nodes: 1 + +# Set the timeout to initiate the recovery process, once the N nodes +# from previous setting are up (accepts time value): +# +# gateway.recover_after_time: 5m + +# Set how many nodes are expected in this cluster. Once these N nodes +# are up (and recover_after_nodes is met), begin recovery process immediately +# (without waiting for recover_after_time to expire): +# +# gateway.expected_nodes: 2 + + +############################# Recovery Throttling ############################# + +# These settings allow to control the process of shards allocation between +# nodes during initial recovery, replica allocation, rebalancing, +# or when adding and removing nodes. + +# Set the number of concurrent recoveries happening on a node: +# +# 1. During the initial recovery +# +# cluster.routing.allocation.node_initial_primaries_recoveries: 4 +# +# 2. During adding/removing nodes, rebalancing, etc +# +# cluster.routing.allocation.node_concurrent_recoveries: 2 + +# Set to throttle throughput when recovering (eg. 100mb, by default unlimited): +# +# indices.recovery.max_size_per_sec: 0 + +# Set to limit the number of open concurrent streams when +# recovering a shard from a peer: +# +# indices.recovery.concurrent_streams: 5 + + +################################## Discovery ################################## + +# Discovery infrastructure ensures nodes can be found within a cluster +# and master node is elected. Multicast discovery is the default. + +# Set to ensure a node sees N other master eligible nodes to be considered +# operational within the cluster. Set this option to a higher value (2-4) +# for large clusters (>3 nodes): +# +# discovery.zen.minimum_master_nodes: 1 + +# Set the time to wait for ping responses from other nodes when discovering. +# Set this option to a higher value on a slow or congested network +# to minimize discovery failures: +# +# discovery.zen.ping.timeout: 3s + +# See +# for more information. + +# Unicast discovery allows to explicitly control which nodes will be used +# to discover the cluster. It can be used when multicast is not present, +# or to restrict the cluster communication-wise. +# +# 1. Disable multicast discovery (enabled by default): +# +discovery.zen.ping.multicast.enabled: false +# +# 2. Configure an initial list of master nodes in the cluster +# to perform discovery when new nodes (master or data) are started: +# +# discovery.zen.ping.unicast.hosts: ["host1", "host2:port", "host3[portX-portY]"] +discovery.zen.ping.unicast.hosts: ["<%= discover_node %>"] + +# EC2 discovery allows to use AWS EC2 API in order to perform discovery. +# +# You have to install the cloud-aws plugin for enabling the EC2 discovery. +# +# See +# for more information. +# +# See +# for a step-by-step tutorial. + + +################################## Slow Log ################################## + +# Shard level query and fetch threshold logging. + +#index.search.slowlog.threshold.query.warn: 10s +#index.search.slowlog.threshold.query.info: 5s +#index.search.slowlog.threshold.query.debug: 2s +#index.search.slowlog.threshold.query.trace: 500ms + +#index.search.slowlog.threshold.fetch.warn: 1s +#index.search.slowlog.threshold.fetch.info: 800ms +#index.search.slowlog.threshold.fetch.debug: 500ms +#index.search.slowlog.threshold.fetch.trace: 200ms + +#index.indexing.slowlog.threshold.index.warn: 10s +#index.indexing.slowlog.threshold.index.info: 5s +#index.indexing.slowlog.threshold.index.debug: 2s +#index.indexing.slowlog.threshold.index.trace: 500ms + +################################## GC Logging ################################ + +#monitor.jvm.gc.ParNew.warn: 1000ms +#monitor.jvm.gc.ParNew.info: 700ms +#monitor.jvm.gc.ParNew.debug: 400ms + +#monitor.jvm.gc.ConcurrentMarkSweep.warn: 10s +#monitor.jvm.gc.ConcurrentMarkSweep.info: 5s +#monitor.jvm.gc.ConcurrentMarkSweep.debug: 2s