Split out elasticsearch module

The spec: http://specs.openstack.org/openstack-infra/infra-specs/specs/puppet-modules.html Depends-On: Ide11b4f4927fcdb4b19b2e06811f2f32c20d83f3 Change-Id: Id38232f6f9d928c893013c5f7eb70956955bf777
2014-11-25 09:56:30 -06:00 · 2014-11-25 09:56:30 -06:00 · a5197dd45a
commit a5197dd45a
parent 7defc1a55b
5 changed files with 1 additions and 593 deletions
--- a/modules.env
+++ b/modules.env
@ -56,6 +56,7 @@ SOURCE_MODULES["https://github.com/puppet-community/puppet-module-puppetboard"]=
 # Add modules that should be part of the openstack-infra integration test here
 INTEGRATION_MODULES["https://git.openstack.org/openstack-infra/puppet-elasticsearch"]="origin/master"
 INTEGRATION_MODULES["https://git.openstack.org/openstack-infra/puppet-storyboard"]="origin/master"
 INTEGRATION_MODULES["https://git.openstack.org/openstack-infra/puppet-kibana"]="origin/master"
 INTEGRATION_MODULES["https://git.openstack.org/openstack-infra/puppet-jenkins"]="origin/master"
--- a/modules/elasticsearch/files/elasticsearch.mapping.json
+++ b/modules/elasticsearch/files/elasticsearch.mapping.json
@ -1,20 +0,0 @@
 {
  "_default_": {
    "_all": { "enabled": false },
    "_source": { "compress": true },
    "dynamic_templates": [
      {
        "string_template" : {
          "match" : "*",
          "mapping": { "type": "string", "index": "not_analyzed" },
          "match_mapping_type" : "string"
        }
      }
    ],
    "properties" : {
      "@fields": { "type": "object", "dynamic": true, "path": "full" },
      "@message" : { "type" : "string", "index" : "analyzed" },
      "message" : { "type" : "string", "index" : "analyzed" }
    }
  }
 }
--- a/modules/elasticsearch/manifests/init.pp
+++ b/modules/elasticsearch/manifests/init.pp
@ -1,127 +0,0 @@
 # Copyright 2013 Hewlett-Packard Development Company, L.P.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License. You may obtain
 # a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations
 # under the License.
 #
 # Class to install elasticsearch.
 #
 class elasticsearch (
  $version = '0.20.5',
  $heap_size = '16g',
  $es_template_config = {}
 ) {
  # install java runtime
  if ! defined(Package['openjdk-7-jre-headless']) {
    package { 'openjdk-7-jre-headless':
      ensure => present,
    }
  }
  # Curl is handy for talking to the ES API on localhost. Allows for
  # querying cluster state and deleting indexes and so on.
  if ! defined(Package['curl']) {
    package { 'curl':
      ensure => present,
    }
  }
  exec { 'get_elasticsearch_deb':
    command => "wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-${version}.deb -O /tmp/elasticsearch-${version}.deb",
    path    => '/bin:/usr/bin',
    creates => "/tmp/elasticsearch-${version}.deb",
  }
  exec { 'gen_elasticsearch_deb_sha1':
    command => "sha1sum elasticsearch-${version}.deb > /tmp/elasticsearch-${version}.deb.sha1.gen",
    path    => '/bin:/usr/bin',
    cwd     => '/tmp',
    creates => "/tmp/elasticsearch-${version}.deb.sha1.gen",
    require => [
      Exec['get_elasticsearch_deb'],
    ]
  }
  exec { 'get_elasticsearch_deb_sha1':
    command => "wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-${version}.deb.sha1.txt -O /tmp/elasticsearch-${version}.deb.sha1.txt",
    path    => '/bin:/usr/bin',
    creates => "/tmp/elasticsearch-${version}.deb.sha1.txt",
  }
  exec { 'check_elasticsearch_sha1':
    command     => "diff /tmp/elasticsearch-${version}.deb.sha1.txt /tmp/elasticsearch-${version}.deb.sha1.gen",
    path        => '/bin:/usr/bin',
    subscribe   => Exec['get_elasticsearch_deb'],
    refreshonly => true,
    require     => [
      Exec['gen_elasticsearch_deb_sha1'],
      Exec['get_elasticsearch_deb_sha1'],
    ]
  }
  # install elastic search
  package { 'elasticsearch':
    ensure    => latest,
    source    => "/tmp/elasticsearch-${version}.deb",
    provider  => 'dpkg',
    subscribe => Exec['get_elasticsearch_deb_sha1'],
    require   => [
      Package['openjdk-7-jre-headless'],
      File['/etc/elasticsearch/elasticsearch.yml'],
      File['/etc/elasticsearch/default-mapping.json'],
      File['/etc/default/elasticsearch'],
    ]
  }
  file { '/etc/elasticsearch':
    ensure  => directory,
    owner   => 'root',
    group   => 'root',
    mode    => '0755',
  }
  file { '/etc/elasticsearch/elasticsearch.yml':
    ensure  => present,
    content => template('elasticsearch/elasticsearch.yml.erb'),
    replace => true,
    owner   => 'root',
    group   => 'root',
    mode    => '0644',
    require => File['/etc/elasticsearch'],
  }
  file { '/etc/elasticsearch/templates':
    ensure  => directory,
    owner   => 'root',
    group   => 'root',
    mode    => '0755',
    require => File['/etc/elasticsearch'],
  }
  file { '/etc/elasticsearch/default-mapping.json':
    ensure  => present,
    source  => 'puppet:///modules/elasticsearch/elasticsearch.mapping.json',
    replace => true,
    owner   => 'root',
    group   => 'root',
    mode    => '0644',
    require => File['/etc/elasticsearch'],
  }
  file { '/etc/default/elasticsearch':
    ensure  => present,
    content => template('elasticsearch/elasticsearch.default.erb'),
    replace => true,
    owner   => 'root',
    group   => 'root',
    mode    => '0644',
  }
 }
--- a/modules/elasticsearch/templates/elasticsearch.default.erb
+++ b/modules/elasticsearch/templates/elasticsearch.default.erb
@ -1,38 +0,0 @@
 # Run ElasticSearch as this user ID and group ID
 #ES_USER=elasticsearch
 #ES_GROUP=elasticsearch
 # Heap Size (defaults to 256m min, 1g max)
 ES_HEAP_SIZE=<%= heap_size %>
 # Heap new generation
 #ES_HEAP_NEWSIZE=
 # max direct memory
 #ES_DIRECT_SIZE=
 # Maximum number of open files, defaults to 65535.
 #MAX_OPEN_FILES=65535
 # Maximum locked memory size. Set to "unlimited" if you use the
 # bootstrap.mlockall option in elasticsearch.yml. You must also set
 # ES_HEAP_SIZE.
 MAX_LOCKED_MEMORY=unlimited
 # ElasticSearch log directory
 #LOG_DIR=/var/log/elasticsearch
 # ElasticSearch data directory
 #DATA_DIR=/var/lib/elasticsearch
 # ElasticSearch work directory
 #WORK_DIR=/tmp/elasticsearch
 # ElasticSearch configuration directory
 #CONF_DIR=/etc/elasticsearch
 # ElasticSearch configuration file (elasticsearch.yml)
 #CONF_FILE=/etc/elasticsearch/elasticsearch.yml
 # Additional Java OPTS
 #ES_JAVA_OPTS=
--- a/modules/elasticsearch/templates/elasticsearch.yml.erb
+++ b/modules/elasticsearch/templates/elasticsearch.yml.erb
@ -1,408 +0,0 @@
 ##################### ElasticSearch Configuration Example #####################
 # This file contains an overview of various configuration settings,
 # targeted at operations staff. Application developers should
 # consult the guide at <http://elasticsearch.org/guide>.
 #
 # The installation procedure is covered at
 # <http://elasticsearch.org/guide/reference/setup/installation.html>.
 #
 # ElasticSearch comes with reasonable defaults for most settings,
 # so you can try it out without bothering with configuration.
 #
 # Most of the time, these defaults are just fine for running a production
 # cluster. If you're fine-tuning your cluster, or wondering about the
 # effect of certain configuration option, please _do ask_ on the
 # mailing list or IRC channel [http://elasticsearch.org/community].
 # Any element in the configuration can be replaced with environment variables
 # by placing them in ${...} notation. For example:
 #
 # node.rack: ${RACK_ENV_VAR}
 # See <http://elasticsearch.org/guide/reference/setup/configuration.html>
 # for information on supported formats and syntax for the configuration file.
 ################################### Cluster ###################################
 # Cluster name identifies your cluster for auto-discovery. If you're running
 # multiple clusters on the same network, make sure you're using unique names.
 #
 # cluster.name: elasticsearch
 #################################### Node #####################################
 # Node names are generated dynamically on startup, so you're relieved
 # from configuring them manually. You can tie this node to a specific name:
 #
 node.name: "<%= scope.lookupvar("::hostname") %>"
 # Every node can be configured to allow or deny being eligible as the master,
 # and to allow or deny to store the data.
 #
 # Allow this node to be eligible as a master node (enabled by default):
 #
 # node.master: true
 #
 # Allow this node to store data (enabled by default):
 #
 # node.data: true
 # You can exploit these settings to design advanced cluster topologies.
 #
 # 1. You want this node to never become a master node, only to hold data.
 #    This will be the "workhorse" of your cluster.
 #
 # node.master: false
 # node.data: true
 #
 # 2. You want this node to only serve as a master: to not store any data and
 #    to have free resources. This will be the "coordinator" of your cluster.
 #
 # node.master: true
 # node.data: false
 #
 # 3. You want this node to be neither master nor data node, but
 #    to act as a "search load balancer" (fetching data from nodes,
 #    aggregating results, etc.)
 #
 # node.master: false
 # node.data: false
 # Use the Cluster Health API [http://localhost:9200/_cluster/health], the
 # Node Info API [http://localhost:9200/_cluster/nodes] or GUI tools
 # such as <http://github.com/lukas-vlcek/bigdesk> and
 # <http://mobz.github.com/elasticsearch-head> to inspect the cluster state.
 # A node can have generic attributes associated with it, which can later be used
 # for customized shard allocation filtering, or allocation awareness. An attribute
 # is a simple key value pair, similar to node.key: value, here is an example:
 #
 # node.rack: rack314
 # By default, multiple nodes are allowed to start from the same installation location
 # to disable it, set the following:
 # node.max_local_storage_nodes: 1
 <% if es_template_config.has_key?('node.master') then -%>
 node.master: <%= es_template_config['node.master'] %>
 <% end -%>
 <% if es_template_config.has_key?('node.data') then -%>
 node.data: <%= es_template_config['node.data'] %>
 <% end -%>
 #################################### Index ####################################
 # You can set a number of options (such as shard/replica options, mapping
 # or analyzer definitions, translog settings, ...) for indices globally,
 # in this file.
 #
 # Note, that it makes more sense to configure index settings specifically for
 # a certain index, either when creating it or by using the index templates API.
 #
 # See <http://elasticsearch.org/guide/reference/index-modules/> and
 # <http://elasticsearch.org/guide/reference/api/admin-indices-create-index.html>
 # for more information.
 # Set the number of shards (splits) of an index (5 by default):
 #
 # index.number_of_shards: 5
 # Set the number of replicas (additional copies) of an index (1 by default):
 #
 # index.number_of_replicas: 1
 # Note, that for development on a local machine, with small indices, it usually
 # makes sense to "disable" the distributed features:
 #
 # index.number_of_shards: 1
 # index.number_of_replicas: 0
 # These settings directly affect the performance of index and search operations
 # in your cluster. Assuming you have enough machines to hold shards and
 # replicas, the rule of thumb is:
 #
 # 1. Having more *shards* enhances the _indexing_ performance and allows to
 #    _distribute_ a big index across machines.
 # 2. Having more *replicas* enhances the _search_ performance and improves the
 #    cluster _availability_.
 #
 # The "number_of_shards" is a one-time setting for an index.
 #
 # The "number_of_replicas" can be increased or decreased anytime,
 # by using the Index Update Settings API.
 #
 # ElasticSearch takes care about load balancing, relocating, gathering the
 # results from nodes, etc. Experiment with different settings to fine-tune
 # your setup.
 # Use the Index Status API (<http://localhost:9200/A/_status>) to inspect
 # the index status.
 <% if es_template_config.has_key?('index.store.compress.stored') then -%>
 index.store.compress.stored: <%= es_template_config['index.store.compress.stored'] %>
 <% end -%>
 <% if es_template_config.has_key?('index.store.compress.tv') then -%>
 index.store.compress.tv: <%= es_template_config['index.store.compress.tv'] %>
 <% end -%>
 <% if es_template_config.has_key?('indices.memory.index_buffer_size') then -%>
 indices.memory.index_buffer_size: "<%= es_template_config['indices.memory.index_buffer_size'] %>"
 <% end -%>
 #################################### Paths ####################################
 # Path to directory containing configuration (this file and logging.yml):
 #
 # path.conf: /path/to/conf
 # Path to directory where to store index data allocated for this node.
 #
 # path.data: /path/to/data
 #
 # Can optionally include more than one location, causing data to be striped across
 # the locations (a la RAID 0) on a file level, favouring locations with most free
 # space on creation. For example:
 #
 # path.data: /path/to/data1,/path/to/data2
 # Path to temporary files:
 #
 # path.work: /path/to/work
 # Path to log files:
 #
 # path.logs: /path/to/logs
 # Path to where plugins are installed:
 #
 # path.plugins: /path/to/plugins
 #################################### Plugin ###################################
 # If a plugin listed here is not installed for current node, the node will not start.
 #
 # plugin.mandatory: mapper-attachments,lang-groovy
 ################################### Memory ####################################
 # ElasticSearch performs poorly when JVM starts swapping: you should ensure that
 # it _never_ swaps.
 #
 # Set this property to true to lock the memory:
 #
 <% if es_template_config.has_key?('bootstrap.mlockall') then -%>
 bootstrap.mlockall: <%= es_template_config['bootstrap.mlockall'] %>
 <% else -%>
 # bootstrap.mlockall: true
 <% end -%>
 # Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set
 # to the same value, and that the machine has enough memory to allocate
 # for ElasticSearch, leaving enough memory for the operating system itself.
 #
 # You should also make sure that the ElasticSearch process is allowed to lock
 # the memory, eg. by using `ulimit -l unlimited`.
 ############################## Network And HTTP ###############################
 # ElasticSearch, by default, binds itself to the 0.0.0.0 address, and listens
 # on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node
 # communication. (the range means that if the port is busy, it will automatically
 # try the next port).
 # Set the bind address specifically (IPv4 or IPv6):
 #
 # network.bind_host: 192.168.0.1
 # Set the address other nodes will use to communicate with this node. If not
 # set, it is automatically derived. It must point to an actual IP address.
 #
 # network.publish_host: 192.168.0.1
 # Set both 'bind_host' and 'publish_host':
 #
 # network.host: 192.168.0.1
 # Set a custom port for the node to node communication (9300 by default):
 #
 # transport.tcp.port: 9300
 # Enable compression for all communication between nodes (disabled by default):
 #
 # transport.tcp.compress: true
 # Set a custom port to listen for HTTP traffic:
 #
 # http.port: 9200
 # Set a custom allowed content length:
 #
 # http.max_content_length: 100mb
 # Disable HTTP completely:
 #
 # http.enabled: false
 ################################### Gateway ###################################
 # The gateway allows for persisting the cluster state between full cluster
 # restarts. Every change to the state (such as adding an index) will be stored
 # in the gateway, and when the cluster starts up for the first time,
 # it will read its state from the gateway.
 # There are several types of gateway implementations. For more information,
 # see <http://elasticsearch.org/guide/reference/modules/gateway>.
 # The default gateway type is the "local" gateway (recommended):
 #
 # gateway.type: local
 # Settings below control how and when to start the initial recovery process on
 # a full cluster restart (to reuse as much local data as possible when using shared
 # gateway).
 # Allow recovery process after N nodes in a cluster are up:
 #
 <% if es_template_config.has_key?('gateway.recover_after_nodes') then -%>
 gateway.recover_after_nodes: <%= es_template_config['gateway.recover_after_nodes'] %>
 <% else -%>
 # gateway.recover_after_nodes: 1
 <% end -%>
 # Set the timeout to initiate the recovery process, once the N nodes
 # from previous setting are up (accepts time value):
 #
 <% if es_template_config.has_key?('gateway.recover_after_time') then -%>
 gateway.recover_after_time: <%= es_template_config['gateway.recover_after_time'] %>
 <% else -%>
 # gateway.recover_after_time: 5m
 <% end -%>
 # Set how many nodes are expected in this cluster. Once these N nodes
 # are up (and recover_after_nodes is met), begin recovery process immediately
 # (without waiting for recover_after_time to expire):
 #
 <% if es_template_config.has_key?('gateway.expected_nodes') then -%>
 gateway.expected_nodes: <%= es_template_config['gateway.expected_nodes'] %>
 <% else -%>
 # gateway.expected_nodes: 2
 <% end -%>
 ############################# Recovery Throttling #############################
 # These settings allow to control the process of shards allocation between
 # nodes during initial recovery, replica allocation, rebalancing,
 # or when adding and removing nodes.
 # Set the number of concurrent recoveries happening on a node:
 #
 # 1. During the initial recovery
 #
 # cluster.routing.allocation.node_initial_primaries_recoveries: 4
 #
 # 2. During adding/removing nodes, rebalancing, etc
 #
 # cluster.routing.allocation.node_concurrent_recoveries: 2
 # Set to throttle throughput when recovering (eg. 100mb, by default unlimited):
 #
 # indices.recovery.max_size_per_sec: 0
 # Set to limit the number of open concurrent streams when
 # recovering a shard from a peer:
 #
 # indices.recovery.concurrent_streams: 5
 ################################## Discovery ##################################
 # Discovery infrastructure ensures nodes can be found within a cluster
 # and master node is elected. Multicast discovery is the default.
 # Set to ensure a node sees N other master eligible nodes to be considered
 # operational within the cluster. Set this option to a higher value (2-4)
 # for large clusters (>3 nodes):
 #
 <% if es_template_config.has_key?('discovery.zen.minimum_master_nodes') then -%>
 discovery.zen.minimum_master_nodes: <%= es_template_config['discovery.zen.minimum_master_nodes'] %>
 <% else -%>
 # discovery.zen.minimum_master_nodes: 1
 <% end -%>
 # Set the time to wait for ping responses from other nodes when discovering.
 # Set this option to a higher value on a slow or congested network
 # to minimize discovery failures:
 #
 # discovery.zen.ping.timeout: 3s
 # See <http://elasticsearch.org/guide/reference/modules/discovery/zen.html>
 # for more information.
 # Unicast discovery allows to explicitly control which nodes will be used
 # to discover the cluster. It can be used when multicast is not present,
 # or to restrict the cluster communication-wise.
 #
 # 1. Disable multicast discovery (enabled by default):
 #
 <% if es_template_config.has_key?('discovery.zen.ping.multicast.enabled') then -%>
 discovery.zen.ping.multicast.enabled: <%= es_template_config['discovery.zen.ping.multicast.enabled'] %>
 <% else -%>
 # discovery.zen.ping.multicast.enabled: false
 <% end -%>
 #
 # 2. Configure an initial list of master nodes in the cluster
 #    to perform discovery when new nodes (master or data) are started:
 #
 # discovery.zen.ping.unicast.hosts: ["host1", "host2:port", "host3[portX-portY]"]
 #
 <% if es_template_config.has_key?('discovery.zen.ping.unicast.hosts') then -%>
 discovery.zen.ping.unicast.hosts: ["<%= es_template_config['discovery.zen.ping.unicast.hosts'].join("\", \"") %>"]
 <% end -%>
 # EC2 discovery allows to use AWS EC2 API in order to perform discovery.
 #
 # You have to install the cloud-aws plugin for enabling the EC2 discovery.
 #
 # See <http://elasticsearch.org/guide/reference/modules/discovery/ec2.html>
 # for more information.
 #
 # See <http://elasticsearch.org/tutorials/2011/08/22/elasticsearch-on-ec2.html>
 # for a step-by-step tutorial.
 ################################## Slow Log ##################################
 # Shard level query and fetch threshold logging.
 #index.search.slowlog.threshold.query.warn: 10s
 #index.search.slowlog.threshold.query.info: 5s
 #index.search.slowlog.threshold.query.debug: 2s
 #index.search.slowlog.threshold.query.trace: 500ms
 #index.search.slowlog.threshold.fetch.warn: 1s
 #index.search.slowlog.threshold.fetch.info: 800ms
 #index.search.slowlog.threshold.fetch.debug: 500ms
 #index.search.slowlog.threshold.fetch.trace: 200ms
 #index.indexing.slowlog.threshold.index.warn: 10s
 #index.indexing.slowlog.threshold.index.info: 5s
 #index.indexing.slowlog.threshold.index.debug: 2s
 #index.indexing.slowlog.threshold.index.trace: 500ms
 ################################## GC Logging ################################
 #monitor.jvm.gc.ParNew.warn: 1000ms
 #monitor.jvm.gc.ParNew.info: 700ms
 #monitor.jvm.gc.ParNew.debug: 400ms
 #monitor.jvm.gc.ConcurrentMarkSweep.warn: 10s
 #monitor.jvm.gc.ConcurrentMarkSweep.info: 5s
 #monitor.jvm.gc.ConcurrentMarkSweep.debug: 2s