Improve logstash and elasticsearch performance

The logstash and elasticsearch performance can be improved by using
async index options, pulling back the refresh interval, and by not
fingerprinting every document.

* Async translog allows elasticsearch to using run fsync in the
  background instead of blocking
* the refresh interval will now be 5x the number of replicas with a cap
  of 30. This integer is representitive of the seconds between index
  refresh calls which greatly lowers the load generated across the
  cluster.
* All documents were fingerprinted before writting to the cluster. This
  was a costly operation as elasticsearch will do a forward lookup on all
  documents with a preset ID resulting in 100's, if not 1000's, of extra
  reads. The purpose of the fingerprint function is to limit repeading
  writes so to keep some of this functionality the fingerprint function is
  now only added to documents with messages.
* G1 garbage collection is now enabled by default when the heap size is
  > 6GiB. Early versions of elasticsearch did not recommend this setting
  however its since stabalized in recent releases.
* JVM options have been moved into the elasticsearch and logstash roles
  allowing these tasks to trigger service restarts when changes are made.

Change-Id: I805129b207ad4db182ae6e59b6ec78eb3e246b54
Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
This commit is contained in:
Kevin Carter 2018-09-21 20:06:52 -05:00
parent daffc177a1
commit 814622cc6c
No known key found for this signature in database
GPG Key ID: 9443251A787B9FB3
9 changed files with 107 additions and 51 deletions

View File

@ -15,6 +15,9 @@
hosts: "elastic-logstash[0]"
become: true
vars:
elastic_refresh_interval: "{{ (elasticsearch_number_of_replicas | int) * 5 }}"
vars_files:
- vars/variables.yml
@ -45,9 +48,6 @@
total_fields:
limit: "10000"
refresh_interval: "5s"
- name: "_all/_settings?preserve_existing=true"
index_options:
index.refresh_interval: "10s"
- name: "_all/_settings?preserve_existing=true"
index_options:
index.queries.cache.enabled: "true"
@ -55,6 +55,8 @@
- name: "_all/_settings"
index_options:
index.number_of_replicas: "{{ elasticsearch_number_of_replicas | int }}"
index.translog.durability: "async"
index.refresh_interval: "{{ ((elastic_refresh_interval | int) > 30) | ternary(30, elastic_refresh_interval) }}s"
- name: Check for basic index template
uri:

View File

@ -31,7 +31,6 @@ h_mem: "{{ (ansible_memtotal_mb | int) // 2 }}"
# path: "/elastic-backup"
# state: mounted
# EXPERIMENTAL - When the heap size for a given elastic node is graeter than
# 4GiB the G1 garbage collector can be enabled. This is an
# experimental feature and may be removed later.
elastic_g1gc_enabled: false
# NOTE(cloudnull) - When the heap size for a given elastic node is graeter than
# 6GiB the G1 garbage collector can be enabled.
elastic_g1gc_enabled: true

View File

@ -192,13 +192,11 @@
owner: "{{ service_owner }}"
group: "{{ service_group }}"
- name: Drop jvm conf file(s)
- name: Drop logrotate conf file(s)
template:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
with_items:
- src: "jvm.options.j2"
dest: "/etc/{{ service_name }}/jvm.options"
- src: "templates/logrotate.j2"
dest: "/etc/logrotate.d/{{ service_name }}"

View File

@ -62,6 +62,16 @@
notify:
- Enable and restart logstash
- name: Drop jvm conf file(s)
template:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
with_items:
- src: "jvm.options.j2"
dest: "/etc/logstash/jvm.options"
notify:
- Enable and restart logstash
- name: Check queue type
block:
- name: Get block device for logstash

View File

@ -238,7 +238,7 @@ xpack.monitoring.elasticsearch.url: ["127.0.0.1:9200"]
#xpack.monitoring.elasticsearch.ssl.keystore.path: /path/to/file
#xpack.monitoring.elasticsearch.ssl.keystore.password: password
#xpack.monitoring.elasticsearch.ssl.verification_mode: certificate
xpack.monitoring.elasticsearch.sniffing: false
xpack.monitoring.elasticsearch.sniffing: {{ elastic_sniffing_enabled | default(false) }}
xpack.monitoring.collection.interval: 30s
xpack.monitoring.collection.pipeline.details.enabled: true
#
@ -255,5 +255,5 @@ xpack.monitoring.collection.pipeline.details.enabled: true
#xpack.management.elasticsearch.ssl.truststore.password: password
#xpack.management.elasticsearch.ssl.keystore.path: /path/to/file
#xpack.management.elasticsearch.ssl.keystore.password: password
#xpack.management.elasticsearch.sniffing: false
#xpack.management.elasticsearch.sniffing: {{ elastic_sniffing_enabled | default(false) }}
#xpack.management.logstash.poll_interval: 5s

View File

@ -68,6 +68,16 @@
tags:
- config
- name: Drop jvm conf file(s)
template:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
with_items:
- src: "jvm.options.j2"
dest: "/etc/elasticsearch/jvm.options"
notify:
- Enable and restart elastic
- name: Drop elasticsearch conf file
template:
src: "{{ item.src }}"

View File

@ -22,7 +22,7 @@
################################################################
## GC Configuration
{% if ((heap_size | int) > 4096) and (elastic_g1gc_enabled | bool) %}
{% if ((heap_size | int) > 6144) and (elastic_g1gc_enabled | bool) %}
-XX:+UseG1GC
-XX:MaxGCPauseMillis=400
-XX:InitiatingHeapOccupancyPercent=75

View File

@ -439,51 +439,83 @@
method => "SHA1"
key => "{{ inventory_hostname | to_uuid }}"
}
} else {
fingerprint {
target => "[@metadata][fingerprint]"
method => "UUID"
}
}
}
output {
if [@metadata][version] {
elasticsearch {
id => "elasticsearchOutputPipeline"
document_id => "%{[@metadata][fingerprint]}"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (not data_node | bool) | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}"
}
} else if [@metadata][beat] {
elasticsearch {
id => "elasticsearchLegacyOutputPipeline"
document_id => "%{[@metadata][fingerprint]}"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (not data_node | bool) | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "%{[@metadata][beat]}-%{+YYYY.MM.dd}"
}
} else if "syslog" in [tags] {
elasticsearch {
id => "elasticsearchSyslogOutputPipeline"
document_id => "%{[@metadata][fingerprint]}"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (not data_node | bool) | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "syslog-%{+YYYY.MM.dd}"
if [@metadata][fingerprint] {
if [@metadata][version] {
elasticsearch {
id => "elasticsearchDocIDOutputPipeline"
document_id => "%{[@metadata][fingerprint]}"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}"
}
} else if [@metadata][beat] {
elasticsearch {
id => "elasticsearchLegacyDocIDOutputPipeline"
document_id => "%{[@metadata][fingerprint]}"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "%{[@metadata][beat]}-%{+YYYY.MM.dd}"
}
} else if "syslog" in [tags] {
elasticsearch {
id => "elasticsearchSyslogDocIDOutputPipeline"
document_id => "%{[@metadata][fingerprint]}"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "syslog-%{+YYYY.MM.dd}"
}
} else {
elasticsearch {
id => "elasticsearchUndefinedDocIDOutputPipeline"
document_id => "%{[@metadata][fingerprint]}"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "undefined-%{+YYYY.MM.dd}"
}
}
} else {
elasticsearch {
id => "elasticsearchUndefinedOutputPipeline"
document_id => "%{[@metadata][fingerprint]}"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (not data_node | bool) | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "undefined-%{+YYYY.MM.dd}"
if [@metadata][version] {
elasticsearch {
id => "elasticsearchOutputPipeline"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}"
}
} else if [@metadata][beat] {
elasticsearch {
id => "elasticsearchLegacyOutputPipeline"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "%{[@metadata][beat]}-%{+YYYY.MM.dd}"
}
} else if "syslog" in [tags] {
elasticsearch {
id => "elasticsearchSyslogOutputPipeline"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "syslog-%{+YYYY.MM.dd}"
}
} else {
elasticsearch {
id => "elasticsearchUndefinedOutputPipeline"
hosts => ["{{ '127.0.0.1:' ~ elastic_port }}"]
sniffing => {{ (elastic_sniffing_enabled | default(not data_node)) | bool | string | lower }}
manage_template => {{ (data_node | bool) | lower }}
index => "undefined-%{+YYYY.MM.dd}"
}
}
}
{% if logstash_kafka_options is defined %}
kafka {
{% for key, value in logstash_kafka_options.items() %}

View File

@ -26,6 +26,11 @@ elastic_vip_url: >-
http://{{ hostvars[groups['kibana'][0]]['ansible_host'] ~ ':' ~ elastic_port }}
{% endif %}
# Elasticsearch can query the itself and loadbalance requests across the cluster.
# This function is automatically enabled on non-data nodes however this setting
# can be used to override the default behaviour.
#elastic_sniffing_enabled: true
# Beat options
heartbeat_services:
- group: "{{ groups['galera_all'] | default([]) }}"