From f69d3913253c3ac67c505a3d92d8135e9cf68d42 Mon Sep 17 00:00:00 2001 From: Kevin Carter Date: Tue, 24 Jul 2018 17:46:15 -0500 Subject: [PATCH] Further tune the playbooks, configs, and thread pool * Implements G1 GC optionally. The variable `elastic_g1gc_enabled` has been added with a default of false. If this option is set true and the system has more than 4GiB of RAM G1GC will be enabled. * Adds new thread options * Better constraints coordination nodes * Interface recover speed has been limited * Buffer size is now set correctly * Serialize elk deployment so that upgrades are non-impacting Change-Id: I89224eeaf4ed29c3bb1d7f8010b69503dbc74e11 Signed-off-by: Kevin Carter --- elk_metrics_6x/README.rst | 1 + .../common_task_data_node_hosts.yml | 58 ++++++++++--------- elk_metrics_6x/createElasticIndexes.yml | 5 +- elk_metrics_6x/installElastic.yml | 48 ++++++++++++--- elk_metrics_6x/installLogstash.yml | 7 ++- elk_metrics_6x/setupAPMserver.yml | 2 +- elk_metrics_6x/setupAuditbeat.yml | 2 +- elk_metrics_6x/setupFilebeat.yml | 2 +- elk_metrics_6x/setupHeartbeat.yml | 2 +- elk_metrics_6x/setupJournalbeat.yml | 2 +- elk_metrics_6x/setupMetricbeat.yml | 2 +- elk_metrics_6x/setupPacketbeat.yml | 2 +- .../templates/99-elasticsearch-output.conf.j2 | 11 ++++ elk_metrics_6x/templates/elasticsearch.yml.j2 | 33 +++++++---- elk_metrics_6x/templates/jvm.options.j2 | 21 ++++--- elk_metrics_6x/templates/logstash.yml.j2 | 7 ++- elk_metrics_6x/vars/variables.yml | 5 ++ 17 files changed, 139 insertions(+), 71 deletions(-) diff --git a/elk_metrics_6x/README.rst b/elk_metrics_6x/README.rst index 2d30d837..e408b14a 100644 --- a/elk_metrics_6x/README.rst +++ b/elk_metrics_6x/README.rst @@ -437,6 +437,7 @@ configuration file using the key/value pairs as options. client_id: "elk_metrics_6x" compression_type: "gzip" security_protocol: "SSL" + id: "UniqueOutputID" For a complete list of all options available within the Logstash Kafka output diff --git a/elk_metrics_6x/common_task_data_node_hosts.yml b/elk_metrics_6x/common_task_data_node_hosts.yml index f700a4df..ca60efbc 100644 --- a/elk_metrics_6x/common_task_data_node_hosts.yml +++ b/elk_metrics_6x/common_task_data_node_hosts.yml @@ -41,18 +41,16 @@ set_fact: data_nodes: "{{ (groups['elastic-logstash'][:master_node_count | int] + groups['elastic-logstash'][master_node_count | int::2]) }}" master_nodes: "{{ groups['elastic-logstash'][:master_node_count | int] }}" - coordination_nodes: |- - {% set nodes=[] %} - {% for host in groups['kibana'] %} - {% set _ = nodes.insert(loop.index, ((hostvars[host]['ansible_host'] | string) + ":" + (elastic_port | string))) %} - {% endfor %} - {{ nodes }} - zen_nodes: |- - {% set nodes=[] %} - {% for host in (groups['elastic-logstash'] | union(groups['kibana'])) %} - {% set _ = nodes.insert(loop.index, (hostvars[host]['ansible_host'] | string)) %} - {% endfor %} - {{ nodes }} + coordination_nodes: >- + {{ + (groups['kibana'] | map('extract', hostvars, 'ansible_host') | list) + | map('regex_replace', '(.*)' ,'\1:' ~ elastic_port) + | list + }} + zen_nodes: >- + {{ + (groups['elastic-logstash'] | union(groups['kibana'])) | map('extract', hostvars, 'ansible_host') | list + }} elasticserch_interface_speed: |- {% set default_interface_fact = hostvars[inventory_hostname]['ansible_' + (elastic_data_interface | replace('-', '_'))] %} {% set speeds = [] %} @@ -85,16 +83,28 @@ {% set _ = speeds.append(1000) %} {% endif %} {% endif %} - {{ ((speeds | min) * 0.75) | int }} + {% set interface_speed = ((speeds | min) * 0.20) | int %} + {{ ((interface_speed | int) > 750) | ternary(750, interface_speed) }} tags: - always +- name: Set data node details + set_fact: + elasticsearch_data_node_details: >- + {{ + (data_nodes | map('extract', hostvars, 'ansible_host') | list) | map('regex_replace', '(.*)' ,'\1:' ~ elastic_port) | list + }} + logstash_data_node_details: >- + {{ + (data_nodes | map('extract', hostvars, 'ansible_host') | list) | map('regex_replace', '(.*)' ,'\1:' ~ logstash_beat_input_port) | list + }} + # based on the assignment of roles to hosts, set per host booleans - name: Node enablement set_fact: master_node: "{{ (inventory_hostname in master_nodes) | ternary(true, false) }}" data_node: "{{ (inventory_hostname in data_nodes) | ternary(true, false) }}" - elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) > 24) | ternary(24, ansible_processor_cores) }}" + elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) >= 24) | ternary(24, ansible_processor_cores) }}" tags: - always @@ -103,26 +113,18 @@ - name: Set data nodes set_fact: elasticsearch_data_hosts: |- + {% set nodes = elasticsearch_data_node_details %} {% if inventory_hostname in data_nodes %} - {% set data_hosts = ['127.0.0.1:' + (elastic_port | string)] %} - {% else %} - {% set nodes=[] %} - {% for host in data_nodes %} - {% set _ = nodes.insert(loop.index, ((hostvars[host]['ansible_host'] | string) + ":" + (elastic_port | string))) %} - {% endfor %} - {% set data_hosts = nodes | shuffle(seed=inventory_hostname) %} + {% set _ = nodes.insert(0, '127.0.0.1:' ~ elastic_port) %} {% endif %} + {% set data_hosts = nodes | shuffle(seed=inventory_hostname) %} {{ data_hosts }} logstash_data_hosts: |- + {% set nodes = logstash_data_node_details %} {% if inventory_hostname in data_nodes %} - {% set data_hosts = ['127.0.0.1:' + (logstash_beat_input_port | string)] %} - {% else %} - {% set nodes=[] %} - {% for host in data_nodes %} - {% set _ = nodes.insert(loop.index, ((hostvars[host]['ansible_host'] | string) + ":" + (logstash_beat_input_port | string))) %} - {% endfor %} - {% set data_hosts = nodes | shuffle(seed=inventory_hostname) %} + {% set _ = nodes.insert(0, '127.0.0.1:' ~ logstash_beat_input_port) %} {% endif %} + {% set data_hosts = nodes | shuffle(seed=inventory_hostname) %} {{ data_hosts }} tags: - always diff --git a/elk_metrics_6x/createElasticIndexes.yml b/elk_metrics_6x/createElasticIndexes.yml index 6930b8e9..c26d21a6 100644 --- a/elk_metrics_6x/createElasticIndexes.yml +++ b/elk_metrics_6x/createElasticIndexes.yml @@ -33,7 +33,8 @@ number_of_replicas: "1" - name: "_all/_settings?preserve_existing=true" index_options: - index.refresh_interval: "1m" + index.refresh_interval: "10s" - name: "_all/_settings?preserve_existing=true" index_options: - index.queries.cache.enabled: "false" + index.queries.cache.enabled: "true" + indices.queries.cache.size: "5%" diff --git a/elk_metrics_6x/installElastic.yml b/elk_metrics_6x/installElastic.yml index 02eac46c..46f8b7f1 100644 --- a/elk_metrics_6x/installElastic.yml +++ b/elk_metrics_6x/installElastic.yml @@ -1,6 +1,41 @@ --- -- name: Install Elastic Search + +- name: Run serialization detection hosts: "elastic-logstash:kibana" + gather_facts: true + + vars_files: + - vars/variables.yml + + tasks: + - include_tasks: common_task_data_node_hosts.yml + + - name: Group by stand alone masters + group_by: + key: elastic_masters + parents: elastic-logstash + when: + - inventory_hostname in master_nodes + + - name: Group by non stand alone masters + group_by: + key: elastic_non_masters + parents: elastic-logstash + when: + - inventory_hostname in (data_nodes | difference(master_nodes)) + + - name: Group by coordinators + group_by: + key: elastic_coordinators + parents: elastic-logstash + when: + - inventory_hostname in groups['kibana'] + tags: + - always + +- name: Install Elastic Search + hosts: "elastic_coordinators:elastic_masters:elastic_non_masters" + serial: "33%" become: true vars_files: @@ -12,11 +47,6 @@ environment: "{{ deployment_environment_variables | default({}) }}" - pre_tasks: - - include_tasks: common_task_data_node_hosts.yml - tags: - - always - tasks: - name: Set memory fact to half set_fact: @@ -40,10 +70,12 @@ elasticsearch_node_master: false elasticsearch_node_data: false elasticsearch_node_ingest: false + elastic_coordination_node: true elastic_heap_size: "{{ (elastic_heap_size | int) // 3 }}" - elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) > 4) | ternary(4, 1) }}" + elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) > 4) | ternary(4, (ansible_processor_cores // 2)) }}" when: - - inventory_hostname in (groups['kibana'] | difference(groups['elastic-logstash'])) + - inventory_hostname in (groups['kibana'] | default([])) and + not inventory_hostname in (groups['elastic-logstash'] | default([])) tags: - always diff --git a/elk_metrics_6x/installLogstash.yml b/elk_metrics_6x/installLogstash.yml index e1edfe90..8cebfeae 100644 --- a/elk_metrics_6x/installLogstash.yml +++ b/elk_metrics_6x/installLogstash.yml @@ -1,6 +1,7 @@ --- - name: Install Logstash hosts: elastic-logstash + serial: "50%" become: true vars_files: - vars/variables.yml @@ -18,7 +19,7 @@ tasks: - name: Set quarter memory fact set_fact: - q_mem: "{{ (ansible_memtotal_mb | int) // 4 }}" + q_mem: "{{ (ansible_memtotal_mb | int) // 3 }}" when: - q_mem is not defined tags: @@ -26,7 +27,7 @@ - name: Set processor cores fact set_fact: - q_storage: "{{ ansible_processor_cores }}" + q_storage: "{{ (ansible_processor_cores | int) * 2 }}" when: - q_storage is not defined tags: @@ -124,7 +125,7 @@ notify: - Enable and restart logstash - - name: Drop elasticsearch conf file + - name: Drop logstash conf file(s) template: src: "{{ item.src }}" dest: "{{ item.dest }}" diff --git a/elk_metrics_6x/setupAPMserver.yml b/elk_metrics_6x/setupAPMserver.yml index c2767fd3..ba3f76f1 100644 --- a/elk_metrics_6x/setupAPMserver.yml +++ b/elk_metrics_6x/setupAPMserver.yml @@ -2,7 +2,7 @@ - name: Load apm-server Dashboards hosts: apm-server[0] - gather_facts: false + gather_facts: true vars_files: - vars/variables.yml diff --git a/elk_metrics_6x/setupAuditbeat.yml b/elk_metrics_6x/setupAuditbeat.yml index b7c171a7..e5e100cc 100644 --- a/elk_metrics_6x/setupAuditbeat.yml +++ b/elk_metrics_6x/setupAuditbeat.yml @@ -2,7 +2,7 @@ - name: Load Auditbeat Dashboards hosts: hosts[0] - gather_facts: false + gather_facts: true vars_files: - vars/variables.yml diff --git a/elk_metrics_6x/setupFilebeat.yml b/elk_metrics_6x/setupFilebeat.yml index feb020a8..ec67d7e0 100644 --- a/elk_metrics_6x/setupFilebeat.yml +++ b/elk_metrics_6x/setupFilebeat.yml @@ -2,7 +2,7 @@ - name: Load Filebeat Dashboards hosts: hosts[0] - gather_facts: false + gather_facts: true vars_files: - vars/variables.yml diff --git a/elk_metrics_6x/setupHeartbeat.yml b/elk_metrics_6x/setupHeartbeat.yml index 4c7be4a7..9d648867 100644 --- a/elk_metrics_6x/setupHeartbeat.yml +++ b/elk_metrics_6x/setupHeartbeat.yml @@ -2,7 +2,7 @@ - name: Load Heartbeat Dashboards hosts: kibana[0] - gather_facts: false + gather_facts: true vars_files: - vars/variables.yml diff --git a/elk_metrics_6x/setupJournalbeat.yml b/elk_metrics_6x/setupJournalbeat.yml index 52248af3..ecf84f17 100644 --- a/elk_metrics_6x/setupJournalbeat.yml +++ b/elk_metrics_6x/setupJournalbeat.yml @@ -15,7 +15,7 @@ - name: Load Journalbeat Dashboards hosts: hosts[0] - gather_facts: false + gather_facts: true vars_files: - vars/variables.yml diff --git a/elk_metrics_6x/setupMetricbeat.yml b/elk_metrics_6x/setupMetricbeat.yml index 47741192..302b9321 100644 --- a/elk_metrics_6x/setupMetricbeat.yml +++ b/elk_metrics_6x/setupMetricbeat.yml @@ -2,7 +2,7 @@ - name: Load Metricsbeat Dashboards hosts: all[0] - gather_facts: false + gather_facts: true vars_files: - vars/variables.yml diff --git a/elk_metrics_6x/setupPacketbeat.yml b/elk_metrics_6x/setupPacketbeat.yml index 03440a48..8312a34e 100644 --- a/elk_metrics_6x/setupPacketbeat.yml +++ b/elk_metrics_6x/setupPacketbeat.yml @@ -2,7 +2,7 @@ - name: Load Packetbeat Dashboards hosts: hosts[0] - gather_facts: false + gather_facts: true vars_files: - vars/variables.yml diff --git a/elk_metrics_6x/templates/99-elasticsearch-output.conf.j2 b/elk_metrics_6x/templates/99-elasticsearch-output.conf.j2 index 1a27e0f8..bbc7046f 100644 --- a/elk_metrics_6x/templates/99-elasticsearch-output.conf.j2 +++ b/elk_metrics_6x/templates/99-elasticsearch-output.conf.j2 @@ -1,6 +1,16 @@ +filter { + fingerprint { + source => "message" + target => "[@metadata][fingerprint]" + method => "SHA1" + key => "{{ cluster_name | replace(' ', '_') }}" + base64encode => true + } +} output { if [@metadata][version] { elasticsearch { + document_id => "%{[@metadata][fingerprint]}" hosts => {{ elasticsearch_data_hosts | shuffle(seed=inventory_hostname) | to_json }} sniffing => {{ (not data_node | bool) | lower }} manage_template => {{ (data_node | bool) | lower }} @@ -8,6 +18,7 @@ output { } } else { elasticsearch { + document_id => "%{[@metadata][fingerprint]}" hosts => {{ elasticsearch_data_hosts | shuffle(seed=inventory_hostname) | to_json }} sniffing => {{ (not data_node | bool) | lower }} manage_template => {{ (data_node | bool) | lower }} diff --git a/elk_metrics_6x/templates/elasticsearch.yml.j2 b/elk_metrics_6x/templates/elasticsearch.yml.j2 index d49ffb6f..2fe647d4 100644 --- a/elk_metrics_6x/templates/elasticsearch.yml.j2 +++ b/elk_metrics_6x/templates/elasticsearch.yml.j2 @@ -101,26 +101,35 @@ gateway.recover_after_nodes: {{ ((master_node_count | int) // 2) + 1 }} # # action.destructive_requires_name: true +{% set processors = ((elastic_thread_pool_size | int) > 0) | ternary(elastic_thread_pool_size, 1) %} +{% if not (elastic_coordination_node | default(false)) | bool %} # Thread pool settings. For more on this see the documentation at: # thread_pool: - search: - size: {{ (elastic_thread_pool_size | int) }} - queue_size: {{ (elastic_thread_pool_size | int) * 256 }} index: - size: {{ (elastic_thread_pool_size | int) }} - queue_size: {{ (elastic_thread_pool_size | int) * 256 }} - bulk: - size: {{ (elastic_thread_pool_size | int) }} - queue_size: {{ (elastic_thread_pool_size | int) * 512 }} + queue_size: {{ (processors | int) * 256 }} + get: + queue_size: {{ (processors | int) * 256 }} + write: + queue_size: {{ (processors | int) * 512 }} +{% else %} +# The number of processors is automatically detected, and the thread pool +# settings are automatically set based on it. In some cases it can be useful to +# override the number of detected processors. This can be done by explicitly +# setting the processors setting. On Kibana hosts where elasticsearch is running +# as a coordination node, the processor count is limited. +processors: {{ processors }} +{% endif %} -# Accepts either a percentage or a byte size value. Set to 30%, meaning that 30% + +# Accepts either a percentage or a byte size value. Set to 20%, meaning that 20% # of the total heap allocated to a node will be used as the indexing buffer size # shared across all shards. -indices.memory.index_buffer_size: 30% +indices.memory.index_buffer_size: 20% -# Connection throttling on recovery is limited to 75% of the detected interface -# speed. This will improce search speeds and reduce general cluster pressure. +# Connection throttling on recovery is limited to 20% of the detected interface +# speed with a cap of 750mb. This will improce search speeds and reduce general +# cluster pressure. indices.recovery.max_bytes_per_sec: {{ elasticserch_interface_speed }}mb # ---------------------------------- X-Pack ------------------------------------ diff --git a/elk_metrics_6x/templates/jvm.options.j2 b/elk_metrics_6x/templates/jvm.options.j2 index 25954b0e..712699c7 100644 --- a/elk_metrics_6x/templates/jvm.options.j2 +++ b/elk_metrics_6x/templates/jvm.options.j2 @@ -1,14 +1,13 @@ ## JVM configuration - -# Xms represents the initial size of total heap space -# Xmx represents the maximum size of total heap space {% if (not (elasticsearch_node_master | default(master_node)) | bool) and (not (elasticsearch_node_data | default(data_node)) | bool) %} --Xms{{ (elastic_heap_size | int) // 2 }}m --Xmx{{ (elastic_heap_size | int) // 2 }}m +{% set heap_size = (elastic_heap_size | int) // 2 %} {% else %} --Xms{{ elastic_heap_size }}m --Xmx{{ elastic_heap_size }}m +{% set heap_size = (elastic_heap_size | int) %} {% endif %} +# Xms represents the initial size of total heap space +-Xms{{ heap_size }}m +# Xmx represents the maximum size of total heap space +-Xmx{{ heap_size }}m ################################################################ @@ -21,11 +20,17 @@ ## ################################################################ -## GC configuration +## GC Configuration +{% if ((heap_size | int) > 4096) and (elastic_g1gc_enabled | bool) %} +-XX:+UseG1GC +-XX:MaxGCPauseMillis=400 +-XX:InitiatingHeapOccupancyPercent=75 +{% else %} -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly +{% endif %} ## optimizations diff --git a/elk_metrics_6x/templates/logstash.yml.j2 b/elk_metrics_6x/templates/logstash.yml.j2 index 4a15be74..1fc02e2a 100644 --- a/elk_metrics_6x/templates/logstash.yml.j2 +++ b/elk_metrics_6x/templates/logstash.yml.j2 @@ -38,16 +38,17 @@ path.data: /var/lib/logstash # # This defaults to the number of the host's CPU cores. # -# pipeline.workers: 2 +{% set processors = ((elastic_thread_pool_size | int) > 0) | ternary(elastic_thread_pool_size, 1) %} +pipeline.workers: {{ processors | int }} # # How many events to retrieve from inputs before sending to filters+workers # -# pipeline.batch.size: 125 +pipeline.batch.size: 256 # # How long to wait in milliseconds while polling for the next event # before dispatching an undersized batch to filters+outputs # -# pipeline.batch.delay: 50 +pipeline.batch.delay: 20 # # Force Logstash to exit during shutdown even if there are still inflight # events in memory. By default, logstash will refuse to quit until all diff --git a/elk_metrics_6x/vars/variables.yml b/elk_metrics_6x/vars/variables.yml index 78e9afc0..2b481dd8 100644 --- a/elk_metrics_6x/vars/variables.yml +++ b/elk_metrics_6x/vars/variables.yml @@ -68,6 +68,11 @@ elastic_vip_url: >- # path: "/elastic-backup" # state: mounted +# EXPERIMENTAL - When the heap size for a given elastic node is graeter than +# 4GiB the G1 garbage collector can be enabled. This is an +# experimental feature and may be removed later. +elastic_g1gc_enabled: false + # kibana vars kibana_interface: 0.0.0.0 kibana_port: 5601