Further tune the playbooks, configs, and thread pool

* Implements G1 GC optionally. The variable `elastic_g1gc_enabled` has
  been added with a default of false. If this option is set true and the
  system has more than 4GiB of RAM G1GC will be enabled.
* Adds new thread options
* Better constraints coordination nodes
* Interface recover speed has been limited
* Buffer size is now set correctly
* Serialize elk deployment so that upgrades are non-impacting

Change-Id: I89224eeaf4ed29c3bb1d7f8010b69503dbc74e11
Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
This commit is contained in:
Kevin Carter 2018-07-24 17:46:15 -05:00 committed by Kevin Carter (cloudnull)
parent 39e9905d00
commit f69d391325
17 changed files with 139 additions and 71 deletions

View File

@ -437,6 +437,7 @@ configuration file using the key/value pairs as options.
client_id: "elk_metrics_6x"
compression_type: "gzip"
security_protocol: "SSL"
id: "UniqueOutputID"
For a complete list of all options available within the Logstash Kafka output

View File

@ -41,18 +41,16 @@
set_fact:
data_nodes: "{{ (groups['elastic-logstash'][:master_node_count | int] + groups['elastic-logstash'][master_node_count | int::2]) }}"
master_nodes: "{{ groups['elastic-logstash'][:master_node_count | int] }}"
coordination_nodes: |-
{% set nodes=[] %}
{% for host in groups['kibana'] %}
{% set _ = nodes.insert(loop.index, ((hostvars[host]['ansible_host'] | string) + ":" + (elastic_port | string))) %}
{% endfor %}
{{ nodes }}
zen_nodes: |-
{% set nodes=[] %}
{% for host in (groups['elastic-logstash'] | union(groups['kibana'])) %}
{% set _ = nodes.insert(loop.index, (hostvars[host]['ansible_host'] | string)) %}
{% endfor %}
{{ nodes }}
coordination_nodes: >-
{{
(groups['kibana'] | map('extract', hostvars, 'ansible_host') | list)
| map('regex_replace', '(.*)' ,'\1:' ~ elastic_port)
| list
}}
zen_nodes: >-
{{
(groups['elastic-logstash'] | union(groups['kibana'])) | map('extract', hostvars, 'ansible_host') | list
}}
elasticserch_interface_speed: |-
{% set default_interface_fact = hostvars[inventory_hostname]['ansible_' + (elastic_data_interface | replace('-', '_'))] %}
{% set speeds = [] %}
@ -85,16 +83,28 @@
{% set _ = speeds.append(1000) %}
{% endif %}
{% endif %}
{{ ((speeds | min) * 0.75) | int }}
{% set interface_speed = ((speeds | min) * 0.20) | int %}
{{ ((interface_speed | int) > 750) | ternary(750, interface_speed) }}
tags:
- always
- name: Set data node details
set_fact:
elasticsearch_data_node_details: >-
{{
(data_nodes | map('extract', hostvars, 'ansible_host') | list) | map('regex_replace', '(.*)' ,'\1:' ~ elastic_port) | list
}}
logstash_data_node_details: >-
{{
(data_nodes | map('extract', hostvars, 'ansible_host') | list) | map('regex_replace', '(.*)' ,'\1:' ~ logstash_beat_input_port) | list
}}
# based on the assignment of roles to hosts, set per host booleans
- name: Node enablement
set_fact:
master_node: "{{ (inventory_hostname in master_nodes) | ternary(true, false) }}"
data_node: "{{ (inventory_hostname in data_nodes) | ternary(true, false) }}"
elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) > 24) | ternary(24, ansible_processor_cores) }}"
elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) >= 24) | ternary(24, ansible_processor_cores) }}"
tags:
- always
@ -103,26 +113,18 @@
- name: Set data nodes
set_fact:
elasticsearch_data_hosts: |-
{% set nodes = elasticsearch_data_node_details %}
{% if inventory_hostname in data_nodes %}
{% set data_hosts = ['127.0.0.1:' + (elastic_port | string)] %}
{% else %}
{% set nodes=[] %}
{% for host in data_nodes %}
{% set _ = nodes.insert(loop.index, ((hostvars[host]['ansible_host'] | string) + ":" + (elastic_port | string))) %}
{% endfor %}
{% set data_hosts = nodes | shuffle(seed=inventory_hostname) %}
{% set _ = nodes.insert(0, '127.0.0.1:' ~ elastic_port) %}
{% endif %}
{% set data_hosts = nodes | shuffle(seed=inventory_hostname) %}
{{ data_hosts }}
logstash_data_hosts: |-
{% set nodes = logstash_data_node_details %}
{% if inventory_hostname in data_nodes %}
{% set data_hosts = ['127.0.0.1:' + (logstash_beat_input_port | string)] %}
{% else %}
{% set nodes=[] %}
{% for host in data_nodes %}
{% set _ = nodes.insert(loop.index, ((hostvars[host]['ansible_host'] | string) + ":" + (logstash_beat_input_port | string))) %}
{% endfor %}
{% set data_hosts = nodes | shuffle(seed=inventory_hostname) %}
{% set _ = nodes.insert(0, '127.0.0.1:' ~ logstash_beat_input_port) %}
{% endif %}
{% set data_hosts = nodes | shuffle(seed=inventory_hostname) %}
{{ data_hosts }}
tags:
- always

View File

@ -33,7 +33,8 @@
number_of_replicas: "1"
- name: "_all/_settings?preserve_existing=true"
index_options:
index.refresh_interval: "1m"
index.refresh_interval: "10s"
- name: "_all/_settings?preserve_existing=true"
index_options:
index.queries.cache.enabled: "false"
index.queries.cache.enabled: "true"
indices.queries.cache.size: "5%"

View File

@ -1,6 +1,41 @@
---
- name: Install Elastic Search
- name: Run serialization detection
hosts: "elastic-logstash:kibana"
gather_facts: true
vars_files:
- vars/variables.yml
tasks:
- include_tasks: common_task_data_node_hosts.yml
- name: Group by stand alone masters
group_by:
key: elastic_masters
parents: elastic-logstash
when:
- inventory_hostname in master_nodes
- name: Group by non stand alone masters
group_by:
key: elastic_non_masters
parents: elastic-logstash
when:
- inventory_hostname in (data_nodes | difference(master_nodes))
- name: Group by coordinators
group_by:
key: elastic_coordinators
parents: elastic-logstash
when:
- inventory_hostname in groups['kibana']
tags:
- always
- name: Install Elastic Search
hosts: "elastic_coordinators:elastic_masters:elastic_non_masters"
serial: "33%"
become: true
vars_files:
@ -12,11 +47,6 @@
environment: "{{ deployment_environment_variables | default({}) }}"
pre_tasks:
- include_tasks: common_task_data_node_hosts.yml
tags:
- always
tasks:
- name: Set memory fact to half
set_fact:
@ -40,10 +70,12 @@
elasticsearch_node_master: false
elasticsearch_node_data: false
elasticsearch_node_ingest: false
elastic_coordination_node: true
elastic_heap_size: "{{ (elastic_heap_size | int) // 3 }}"
elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) > 4) | ternary(4, 1) }}"
elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) > 4) | ternary(4, (ansible_processor_cores // 2)) }}"
when:
- inventory_hostname in (groups['kibana'] | difference(groups['elastic-logstash']))
- inventory_hostname in (groups['kibana'] | default([])) and
not inventory_hostname in (groups['elastic-logstash'] | default([]))
tags:
- always

View File

@ -1,6 +1,7 @@
---
- name: Install Logstash
hosts: elastic-logstash
serial: "50%"
become: true
vars_files:
- vars/variables.yml
@ -18,7 +19,7 @@
tasks:
- name: Set quarter memory fact
set_fact:
q_mem: "{{ (ansible_memtotal_mb | int) // 4 }}"
q_mem: "{{ (ansible_memtotal_mb | int) // 3 }}"
when:
- q_mem is not defined
tags:
@ -26,7 +27,7 @@
- name: Set processor cores fact
set_fact:
q_storage: "{{ ansible_processor_cores }}"
q_storage: "{{ (ansible_processor_cores | int) * 2 }}"
when:
- q_storage is not defined
tags:
@ -124,7 +125,7 @@
notify:
- Enable and restart logstash
- name: Drop elasticsearch conf file
- name: Drop logstash conf file(s)
template:
src: "{{ item.src }}"
dest: "{{ item.dest }}"

View File

@ -2,7 +2,7 @@
- name: Load apm-server Dashboards
hosts: apm-server[0]
gather_facts: false
gather_facts: true
vars_files:
- vars/variables.yml

View File

@ -2,7 +2,7 @@
- name: Load Auditbeat Dashboards
hosts: hosts[0]
gather_facts: false
gather_facts: true
vars_files:
- vars/variables.yml

View File

@ -2,7 +2,7 @@
- name: Load Filebeat Dashboards
hosts: hosts[0]
gather_facts: false
gather_facts: true
vars_files:
- vars/variables.yml

View File

@ -2,7 +2,7 @@
- name: Load Heartbeat Dashboards
hosts: kibana[0]
gather_facts: false
gather_facts: true
vars_files:
- vars/variables.yml

View File

@ -15,7 +15,7 @@
- name: Load Journalbeat Dashboards
hosts: hosts[0]
gather_facts: false
gather_facts: true
vars_files:
- vars/variables.yml

View File

@ -2,7 +2,7 @@
- name: Load Metricsbeat Dashboards
hosts: all[0]
gather_facts: false
gather_facts: true
vars_files:
- vars/variables.yml

View File

@ -2,7 +2,7 @@
- name: Load Packetbeat Dashboards
hosts: hosts[0]
gather_facts: false
gather_facts: true
vars_files:
- vars/variables.yml

View File

@ -1,6 +1,16 @@
filter {
fingerprint {
source => "message"
target => "[@metadata][fingerprint]"
method => "SHA1"
key => "{{ cluster_name | replace(' ', '_') }}"
base64encode => true
}
}
output {
if [@metadata][version] {
elasticsearch {
document_id => "%{[@metadata][fingerprint]}"
hosts => {{ elasticsearch_data_hosts | shuffle(seed=inventory_hostname) | to_json }}
sniffing => {{ (not data_node | bool) | lower }}
manage_template => {{ (data_node | bool) | lower }}
@ -8,6 +18,7 @@ output {
}
} else {
elasticsearch {
document_id => "%{[@metadata][fingerprint]}"
hosts => {{ elasticsearch_data_hosts | shuffle(seed=inventory_hostname) | to_json }}
sniffing => {{ (not data_node | bool) | lower }}
manage_template => {{ (data_node | bool) | lower }}

View File

@ -101,26 +101,35 @@ gateway.recover_after_nodes: {{ ((master_node_count | int) // 2) + 1 }}
#
# action.destructive_requires_name: true
{% set processors = ((elastic_thread_pool_size | int) > 0) | ternary(elastic_thread_pool_size, 1) %}
{% if not (elastic_coordination_node | default(false)) | bool %}
# Thread pool settings. For more on this see the documentation at:
# <https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-threadpool.html>
thread_pool:
search:
size: {{ (elastic_thread_pool_size | int) }}
queue_size: {{ (elastic_thread_pool_size | int) * 256 }}
index:
size: {{ (elastic_thread_pool_size | int) }}
queue_size: {{ (elastic_thread_pool_size | int) * 256 }}
bulk:
size: {{ (elastic_thread_pool_size | int) }}
queue_size: {{ (elastic_thread_pool_size | int) * 512 }}
queue_size: {{ (processors | int) * 256 }}
get:
queue_size: {{ (processors | int) * 256 }}
write:
queue_size: {{ (processors | int) * 512 }}
{% else %}
# The number of processors is automatically detected, and the thread pool
# settings are automatically set based on it. In some cases it can be useful to
# override the number of detected processors. This can be done by explicitly
# setting the processors setting. On Kibana hosts where elasticsearch is running
# as a coordination node, the processor count is limited.
processors: {{ processors }}
{% endif %}
# Accepts either a percentage or a byte size value. Set to 30%, meaning that 30%
# Accepts either a percentage or a byte size value. Set to 20%, meaning that 20%
# of the total heap allocated to a node will be used as the indexing buffer size
# shared across all shards.
indices.memory.index_buffer_size: 30%
indices.memory.index_buffer_size: 20%
# Connection throttling on recovery is limited to 75% of the detected interface
# speed. This will improce search speeds and reduce general cluster pressure.
# Connection throttling on recovery is limited to 20% of the detected interface
# speed with a cap of 750mb. This will improce search speeds and reduce general
# cluster pressure.
indices.recovery.max_bytes_per_sec: {{ elasticserch_interface_speed }}mb
# ---------------------------------- X-Pack ------------------------------------

View File

@ -1,14 +1,13 @@
## JVM configuration
# Xms represents the initial size of total heap space
# Xmx represents the maximum size of total heap space
{% if (not (elasticsearch_node_master | default(master_node)) | bool) and (not (elasticsearch_node_data | default(data_node)) | bool) %}
-Xms{{ (elastic_heap_size | int) // 2 }}m
-Xmx{{ (elastic_heap_size | int) // 2 }}m
{% set heap_size = (elastic_heap_size | int) // 2 %}
{% else %}
-Xms{{ elastic_heap_size }}m
-Xmx{{ elastic_heap_size }}m
{% set heap_size = (elastic_heap_size | int) %}
{% endif %}
# Xms represents the initial size of total heap space
-Xms{{ heap_size }}m
# Xmx represents the maximum size of total heap space
-Xmx{{ heap_size }}m
################################################################
@ -21,11 +20,17 @@
##
################################################################
## GC configuration
## GC Configuration
{% if ((heap_size | int) > 4096) and (elastic_g1gc_enabled | bool) %}
-XX:+UseG1GC
-XX:MaxGCPauseMillis=400
-XX:InitiatingHeapOccupancyPercent=75
{% else %}
-XX:+UseParNewGC
-XX:+UseConcMarkSweepGC
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
{% endif %}
## optimizations

View File

@ -38,16 +38,17 @@ path.data: /var/lib/logstash
#
# This defaults to the number of the host's CPU cores.
#
# pipeline.workers: 2
{% set processors = ((elastic_thread_pool_size | int) > 0) | ternary(elastic_thread_pool_size, 1) %}
pipeline.workers: {{ processors | int }}
#
# How many events to retrieve from inputs before sending to filters+workers
#
# pipeline.batch.size: 125
pipeline.batch.size: 256
#
# How long to wait in milliseconds while polling for the next event
# before dispatching an undersized batch to filters+outputs
#
# pipeline.batch.delay: 50
pipeline.batch.delay: 20
#
# Force Logstash to exit during shutdown even if there are still inflight
# events in memory. By default, logstash will refuse to quit until all

View File

@ -68,6 +68,11 @@ elastic_vip_url: >-
# path: "/elastic-backup"
# state: mounted
# EXPERIMENTAL - When the heap size for a given elastic node is graeter than
# 4GiB the G1 garbage collector can be enabled. This is an
# experimental feature and may be removed later.
elastic_g1gc_enabled: false
# kibana vars
kibana_interface: 0.0.0.0
kibana_port: 5601