From 814622cc6c0435fcaf9bb00c6a1ffc5fb2cd1b54 Mon Sep 17 00:00:00 2001 From: Kevin Carter Date: Fri, 21 Sep 2018 20:06:52 -0500 Subject: [PATCH] Improve logstash and elasticsearch performance The logstash and elasticsearch performance can be improved by using async index options, pulling back the refresh interval, and by not fingerprinting every document. * Async translog allows elasticsearch to using run fsync in the background instead of blocking * the refresh interval will now be 5x the number of replicas with a cap of 30. This integer is representitive of the seconds between index refresh calls which greatly lowers the load generated across the cluster. * All documents were fingerprinted before writting to the cluster. This was a costly operation as elasticsearch will do a forward lookup on all documents with a preset ID resulting in 100's, if not 1000's, of extra reads. The purpose of the fingerprint function is to limit repeading writes so to keep some of this functionality the fingerprint function is now only added to documents with messages. * G1 garbage collection is now enabled by default when the heap size is > 6GiB. Early versions of elasticsearch did not recommend this setting however its since stabalized in recent releases. * JVM options have been moved into the elasticsearch and logstash roles allowing these tasks to trigger service restarts when changes are made. Change-Id: I805129b207ad4db182ae6e59b6ec78eb3e246b54 Signed-off-by: Kevin Carter --- elk_metrics_6x/createElasticIndexes.yml | 8 +- .../elastic_dependencies/defaults/main.yml | 7 +- .../roles/elastic_dependencies/tasks/main.yml | 4 +- .../roles/elastic_logstash/tasks/main.yml | 10 ++ .../templates/logstash.yml.j2 | 4 +- .../roles/elasticsearch/tasks/main.yml | 10 ++ .../templates/jvm.options.j2 | 2 +- .../templates/logstash-pipelines.yml.j2 | 108 ++++++++++++------ elk_metrics_6x/vars/variables.yml | 5 + 9 files changed, 107 insertions(+), 51 deletions(-) rename elk_metrics_6x/{roles/elastic_dependencies => }/templates/jvm.options.j2 (97%) diff --git a/elk_metrics_6x/createElasticIndexes.yml b/elk_metrics_6x/createElasticIndexes.yml index a7e43bad..571df62d 100644 --- a/elk_metrics_6x/createElasticIndexes.yml +++ b/elk_metrics_6x/createElasticIndexes.yml @@ -15,6 +15,9 @@ hosts: "elastic-logstash[0]" become: true + vars: + elastic_refresh_interval: "{{ (elasticsearch_number_of_replicas | int) * 5 }}" + vars_files: - vars/variables.yml @@ -45,9 +48,6 @@ total_fields: limit: "10000" refresh_interval: "5s" - - name: "_all/_settings?preserve_existing=true" - index_options: - index.refresh_interval: "10s" - name: "_all/_settings?preserve_existing=true" index_options: index.queries.cache.enabled: "true" @@ -55,6 +55,8 @@ - name: "_all/_settings" index_options: index.number_of_replicas: "{{ elasticsearch_number_of_replicas | int }}" + index.translog.durability: "async" + index.refresh_interval: "{{ ((elastic_refresh_interval | int) > 30) | ternary(30, elastic_refresh_interval) }}s" - name: Check for basic index template uri: diff --git a/elk_metrics_6x/roles/elastic_dependencies/defaults/main.yml b/elk_metrics_6x/roles/elastic_dependencies/defaults/main.yml index 8db0f2c9..55d1a981 100644 --- a/elk_metrics_6x/roles/elastic_dependencies/defaults/main.yml +++ b/elk_metrics_6x/roles/elastic_dependencies/defaults/main.yml @@ -31,7 +31,6 @@ h_mem: "{{ (ansible_memtotal_mb | int) // 2 }}" # path: "/elastic-backup" # state: mounted -# EXPERIMENTAL - When the heap size for a given elastic node is graeter than -# 4GiB the G1 garbage collector can be enabled. This is an -# experimental feature and may be removed later. -elastic_g1gc_enabled: false +# NOTE(cloudnull) - When the heap size for a given elastic node is graeter than +# 6GiB the G1 garbage collector can be enabled. +elastic_g1gc_enabled: true diff --git a/elk_metrics_6x/roles/elastic_dependencies/tasks/main.yml b/elk_metrics_6x/roles/elastic_dependencies/tasks/main.yml index 85158bee..1ee64e4a 100644 --- a/elk_metrics_6x/roles/elastic_dependencies/tasks/main.yml +++ b/elk_metrics_6x/roles/elastic_dependencies/tasks/main.yml @@ -192,13 +192,11 @@ owner: "{{ service_owner }}" group: "{{ service_group }}" -- name: Drop jvm conf file(s) +- name: Drop logrotate conf file(s) template: src: "{{ item.src }}" dest: "{{ item.dest }}" with_items: - - src: "jvm.options.j2" - dest: "/etc/{{ service_name }}/jvm.options" - src: "templates/logrotate.j2" dest: "/etc/logrotate.d/{{ service_name }}" diff --git a/elk_metrics_6x/roles/elastic_logstash/tasks/main.yml b/elk_metrics_6x/roles/elastic_logstash/tasks/main.yml index 41bf570c..1d7863da 100644 --- a/elk_metrics_6x/roles/elastic_logstash/tasks/main.yml +++ b/elk_metrics_6x/roles/elastic_logstash/tasks/main.yml @@ -62,6 +62,16 @@ notify: - Enable and restart logstash +- name: Drop jvm conf file(s) + template: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + with_items: + - src: "jvm.options.j2" + dest: "/etc/logstash/jvm.options" + notify: + - Enable and restart logstash + - name: Check queue type block: - name: Get block device for logstash diff --git a/elk_metrics_6x/roles/elastic_logstash/templates/logstash.yml.j2 b/elk_metrics_6x/roles/elastic_logstash/templates/logstash.yml.j2 index b3f5de2e..dbcefd51 100644 --- a/elk_metrics_6x/roles/elastic_logstash/templates/logstash.yml.j2 +++ b/elk_metrics_6x/roles/elastic_logstash/templates/logstash.yml.j2 @@ -238,7 +238,7 @@ xpack.monitoring.elasticsearch.url: ["127.0.0.1:9200"] #xpack.monitoring.elasticsearch.ssl.keystore.path: /path/to/file #xpack.monitoring.elasticsearch.ssl.keystore.password: password #xpack.monitoring.elasticsearch.ssl.verification_mode: certificate -xpack.monitoring.elasticsearch.sniffing: false +xpack.monitoring.elasticsearch.sniffing: {{ elastic_sniffing_enabled | default(false) }} xpack.monitoring.collection.interval: 30s xpack.monitoring.collection.pipeline.details.enabled: true # @@ -255,5 +255,5 @@ xpack.monitoring.collection.pipeline.details.enabled: true #xpack.management.elasticsearch.ssl.truststore.password: password #xpack.management.elasticsearch.ssl.keystore.path: /path/to/file #xpack.management.elasticsearch.ssl.keystore.password: password -#xpack.management.elasticsearch.sniffing: false +#xpack.management.elasticsearch.sniffing: {{ elastic_sniffing_enabled | default(false) }} #xpack.management.logstash.poll_interval: 5s diff --git a/elk_metrics_6x/roles/elasticsearch/tasks/main.yml b/elk_metrics_6x/roles/elasticsearch/tasks/main.yml index 9d277475..31962d02 100644 --- a/elk_metrics_6x/roles/elasticsearch/tasks/main.yml +++ b/elk_metrics_6x/roles/elasticsearch/tasks/main.yml @@ -68,6 +68,16 @@ tags: - config +- name: Drop jvm conf file(s) + template: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + with_items: + - src: "jvm.options.j2" + dest: "/etc/elasticsearch/jvm.options" + notify: + - Enable and restart elastic + - name: Drop elasticsearch conf file template: src: "{{ item.src }}" diff --git a/elk_metrics_6x/roles/elastic_dependencies/templates/jvm.options.j2 b/elk_metrics_6x/templates/jvm.options.j2 similarity index 97% rename from elk_metrics_6x/roles/elastic_dependencies/templates/jvm.options.j2 rename to elk_metrics_6x/templates/jvm.options.j2 index 6ec918d1..6ccde2d8 100644 --- a/elk_metrics_6x/roles/elastic_dependencies/templates/jvm.options.j2 +++ b/elk_metrics_6x/templates/jvm.options.j2 @@ -22,7 +22,7 @@ ################################################################ ## GC Configuration -{% if ((heap_size | int) > 4096) and (elastic_g1gc_enabled | bool) %} +{% if ((heap_size | int) > 6144) and (elastic_g1gc_enabled | bool) %} -XX:+UseG1GC -XX:MaxGCPauseMillis=400 -XX:InitiatingHeapOccupancyPercent=75 diff --git a/elk_metrics_6x/templates/logstash-pipelines.yml.j2 b/elk_metrics_6x/templates/logstash-pipelines.yml.j2 index 1d08d603..b10f07d8 100644 --- a/elk_metrics_6x/templates/logstash-pipelines.yml.j2 +++ b/elk_metrics_6x/templates/logstash-pipelines.yml.j2 @@ -439,51 +439,83 @@ method => "SHA1" key => "{{ inventory_hostname | to_uuid }}" } - } else { - fingerprint { - target => "[@metadata][fingerprint]" - method => "UUID" - } } } output { - if [@metadata][version] { - elasticsearch { - id => "elasticsearchOutputPipeline" - document_id => "%{[@metadata][fingerprint]}" - hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] - sniffing => {{ (not data_node | bool) | lower }} - manage_template => {{ (data_node | bool) | lower }} - index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}" - } - } else if [@metadata][beat] { - elasticsearch { - id => "elasticsearchLegacyOutputPipeline" - document_id => "%{[@metadata][fingerprint]}" - hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] - sniffing => {{ (not data_node | bool) | lower }} - manage_template => {{ (data_node | bool) | lower }} - index => "%{[@metadata][beat]}-%{+YYYY.MM.dd}" - } - } else if "syslog" in [tags] { - elasticsearch { - id => "elasticsearchSyslogOutputPipeline" - document_id => "%{[@metadata][fingerprint]}" - hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] - sniffing => {{ (not data_node | bool) | lower }} - manage_template => {{ (data_node | bool) | lower }} - index => "syslog-%{+YYYY.MM.dd}" + if [@metadata][fingerprint] { + if [@metadata][version] { + elasticsearch { + id => "elasticsearchDocIDOutputPipeline" + document_id => "%{[@metadata][fingerprint]}" + hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] + sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }} + manage_template => {{ (data_node | bool) | lower }} + index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}" + } + } else if [@metadata][beat] { + elasticsearch { + id => "elasticsearchLegacyDocIDOutputPipeline" + document_id => "%{[@metadata][fingerprint]}" + hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] + sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }} + manage_template => {{ (data_node | bool) | lower }} + index => "%{[@metadata][beat]}-%{+YYYY.MM.dd}" + } + } else if "syslog" in [tags] { + elasticsearch { + id => "elasticsearchSyslogDocIDOutputPipeline" + document_id => "%{[@metadata][fingerprint]}" + hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] + sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }} + manage_template => {{ (data_node | bool) | lower }} + index => "syslog-%{+YYYY.MM.dd}" + } + } else { + elasticsearch { + id => "elasticsearchUndefinedDocIDOutputPipeline" + document_id => "%{[@metadata][fingerprint]}" + hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] + sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }} + manage_template => {{ (data_node | bool) | lower }} + index => "undefined-%{+YYYY.MM.dd}" + } } } else { - elasticsearch { - id => "elasticsearchUndefinedOutputPipeline" - document_id => "%{[@metadata][fingerprint]}" - hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] - sniffing => {{ (not data_node | bool) | lower }} - manage_template => {{ (data_node | bool) | lower }} - index => "undefined-%{+YYYY.MM.dd}" + if [@metadata][version] { + elasticsearch { + id => "elasticsearchOutputPipeline" + hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] + sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }} + manage_template => {{ (data_node | bool) | lower }} + index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}" + } + } else if [@metadata][beat] { + elasticsearch { + id => "elasticsearchLegacyOutputPipeline" + hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] + sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }} + manage_template => {{ (data_node | bool) | lower }} + index => "%{[@metadata][beat]}-%{+YYYY.MM.dd}" + } + } else if "syslog" in [tags] { + elasticsearch { + id => "elasticsearchSyslogOutputPipeline" + hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] + sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }} + manage_template => {{ (data_node | bool) | lower }} + index => "syslog-%{+YYYY.MM.dd}" + } + } else { + elasticsearch { + id => "elasticsearchUndefinedOutputPipeline" + hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"] + sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }} + manage_template => {{ (data_node | bool) | lower }} + index => "undefined-%{+YYYY.MM.dd}" + } } } + {% if logstash_kafka_options is defined %} kafka { {% for key, value in logstash_kafka_options.items() %} diff --git a/elk_metrics_6x/vars/variables.yml b/elk_metrics_6x/vars/variables.yml index f3970e80..0407ac6e 100644 --- a/elk_metrics_6x/vars/variables.yml +++ b/elk_metrics_6x/vars/variables.yml @@ -26,6 +26,11 @@ elastic_vip_url: >- http://{{ hostvars[groups['kibana'][0]]['ansible_host'] ~ ':' ~ elastic_port }} {% endif %} +# Elasticsearch can query the itself and loadbalance requests across the cluster. +# This function is automatically enabled on non-data nodes however this setting +# can be used to override the default behaviour. +#elastic_sniffing_enabled: true + # Beat options heartbeat_services: - group: "{{ groups['galera_all'] | default([]) }}"