Steve Wilkerson bc20c6c8b6 Elasticsearch: Add cron job to verify snapshot repositories
This adds a cron job to manually verify all snapshot repositories
are registered to any active master and data nodes. This is to
address scenarios where master and data nodes do not have the
desired snapshot repositories registered following node outages
or reboots

Change-Id: Ie6f42e95c3ca4dc2ec70f2852a2bde11e59ec097
Signed-off-by: Steve Wilkerson <sw5822@att.com>
2019-08-02 02:02:14 +00:00

873 lines
24 KiB
YAML

# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for elasticsearch
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
images:
tags:
apache_proxy: docker.io/httpd:2.4
memory_init: docker.io/openstackhelm/heat:newton-ubuntu_xenial
curator: docker.io/bobrik/curator:5.6.0
elasticsearch: docker.io/openstackhelm/elasticsearch-s3:latest-5_6_4
ceph_key_placement: docker.io/openstackhelm/ceph-config-helper:latest-ubuntu_xenial
s3_bucket: docker.io/openstackhelm/ceph-daemon:latest-ubuntu_xenial
s3_user: docker.io/openstackhelm/ceph-config-helper:latest-ubuntu_xenial
helm_tests: docker.io/openstackhelm/heat:newton-ubuntu_xenial
prometheus_elasticsearch_exporter: docker.io/justwatch/elasticsearch_exporter:1.0.1
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1
snapshot_repository: docker.io/openstackhelm/ceph-config-helper:latest-ubuntu_xenial
es_cluster_wait: docker.io/openstackhelm/ceph-config-helper:latest-ubuntu_xenial
elasticsearch_templates: docker.io/openstackhelm/heat:newton
image_repo_sync: docker.io/docker:17.07.0
pull_policy: "IfNotPresent"
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
elasticsearch:
node_selector_key: openstack-control-plane
node_selector_value: enabled
job:
node_selector_key: openstack-control-plane
node_selector_value: enabled
test:
node_selector_key: openstack-control-plane
node_selector_value: enabled
dependencies:
dynamic:
common:
local_image_registry:
jobs:
- elasticsearch-image-repo-sync
services:
- endpoint: node
service: local_image_registry
static:
curator:
services:
- endpoint: internal
service: elasticsearch
- endpoint: data
service: elasticsearch
- endpoint: discovery
service: elasticsearch
jobs:
- elasticsearch-register-snapshot-repository
elasticsearch_client:
services:
- endpoint: discovery
service: elasticsearch
jobs: null
elasticsearch_data:
services:
- endpoint: internal
service: elasticsearch
- endpoint: discovery
service: elasticsearch
jobs: null
elasticsearch_master:
services: null
jobs: null
es_cluster_wait:
services:
- endpoint: internal
service: elasticsearch
elasticsearch_templates:
services: null
jobs:
- elasticsearch-cluster-wait
image_repo_sync:
services:
- endpoint: internal
service: local_image_registry
prometheus_elasticsearch_exporter:
services:
- endpoint: internal
service: elasticsearch
snapshot_repository:
services: null
jobs:
- elasticsearch-s3-bucket
- elasticsearch-cluster-wait
verify_repositories:
services: null
jobs:
- elasticsearch-register-snapshot-repository
s3_user:
services:
- endpoint: internal
service: ceph_object_store
s3_bucket:
jobs:
- elasticsearch-s3-user
tests:
services: null
jobs:
- elasticsearch-register-snapshot-repository
pod:
mandatory_access_control:
type: apparmor
elasticsearch-master:
elasticsearch-master: localhost/docker-default
elasticsearch-data:
elasticsearch-data: localhost/docker-default
elasticsearch-client:
elasticsearch-client: localhost/docker-default
security_context:
exporter:
pod:
runAsUser: 99
container:
elasticsearch_exporter:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
client:
pod:
runAsUser: 0
container:
memory_map_increase:
privileged: true
readOnlyRootFilesystem: true
apache_proxy:
readOnlyRootFilesystem: false
elasticsearch_client:
privileged: true
capabilities:
add:
- IPC_LOCK
- SYS_RESOURCE
readOnlyRootFilesystem: true
master:
pod:
runAsUser: 0
container:
memory_map_increase:
privileged: true
readOnlyRootFilesystem: true
elasticsearch_master:
privileged: true
capabilities:
add:
- IPC_LOCK
- SYS_RESOURCE
readOnlyRootFilesystem: true
es_cluster_wait:
pod:
runAsUser: 0
container:
elasticsearch_cluster_wait:
readOnlyRootFilesystem: true
snapshot_repository:
pod:
runAsUser: 0
container:
register_snapshot_repository:
readOnlyRootFilesystem: true
test:
pod:
runAsUser: 0
container:
helm_test:
readOnlyRootFilesystem: true
data:
pod:
runAsUser: 0
container:
memory_map_increase:
privileged: true
readOnlyRootFilesystem: true
elasticsearch_data:
privileged: true
capabilities:
add:
- IPC_LOCK
- SYS_RESOURCE
# NOTE: This was changed from true to false to account for
# recovery scenarios when the data pods are unexpectedly lost due to
# node outages and shard/index recovery is required
readOnlyRootFilesystem: false
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
weight:
default: 10
replicas:
master: 3
data: 3
client: 3
lifecycle:
upgrades:
statefulsets:
pod_replacement_strategy: RollingUpdate
deployments:
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
termination_grace_period:
master:
timeout: 600
data:
timeout: 1200
client:
timeout: 600
prometheus_elasticsearch_exporter:
timeout: 600
mounts:
elasticsearch:
elasticsearch:
elasticsearch_templates:
elasticsearch_templates:
resources:
enabled: false
apache_proxy:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "100m"
client:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
master:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
data:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
prometheus_elasticsearch_exporter:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
jobs:
curator:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
elasticsearch_templates:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
snapshot_repository:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
es_cluster_wait:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
storage_init:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
s3_bucket:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
s3_user:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
tests:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
network_policy:
elasticsearch:
ingress:
- {}
egress:
- {}
prometheus-elasticsearch-exporter:
ingress:
- {}
egress:
- {}
secrets:
rgw:
admin: radosgw-s3-admin-creds
elasticsearch: elasticsearch-s3-user-creds
elasticsearch:
user: elasticsearch-user-secrets
tls:
elasticsearch:
elasticsearch:
public: elasticsearch-tls-public
jobs:
curator:
cron: "* */6 * * *"
history:
success: 3
failed: 1
es_cluster_wait:
backoffLimit: 6
activeDeadlineSeconds: 600
snapshot_repository:
backoffLimit: 6
activeDeadlineSeconds: 600
verify_repositories:
cron: "*/30 * * * *"
history:
success: 3
failed: 1
conf:
httpd: |
ServerRoot "/usr/local/apache2"
Listen 80
LoadModule allowmethods_module modules/mod_allowmethods.so
LoadModule mpm_event_module modules/mod_mpm_event.so
LoadModule authn_file_module modules/mod_authn_file.so
LoadModule authn_core_module modules/mod_authn_core.so
LoadModule authz_host_module modules/mod_authz_host.so
LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
LoadModule authz_user_module modules/mod_authz_user.so
LoadModule authz_core_module modules/mod_authz_core.so
LoadModule access_compat_module modules/mod_access_compat.so
LoadModule auth_basic_module modules/mod_auth_basic.so
LoadModule ldap_module modules/mod_ldap.so
LoadModule authnz_ldap_module modules/mod_authnz_ldap.so
LoadModule reqtimeout_module modules/mod_reqtimeout.so
LoadModule filter_module modules/mod_filter.so
LoadModule proxy_html_module modules/mod_proxy_html.so
LoadModule log_config_module modules/mod_log_config.so
LoadModule env_module modules/mod_env.so
LoadModule headers_module modules/mod_headers.so
LoadModule setenvif_module modules/mod_setenvif.so
LoadModule version_module modules/mod_version.so
LoadModule proxy_module modules/mod_proxy.so
LoadModule proxy_connect_module modules/mod_proxy_connect.so
LoadModule proxy_http_module modules/mod_proxy_http.so
LoadModule proxy_balancer_module modules/mod_proxy_balancer.so
LoadModule slotmem_shm_module modules/mod_slotmem_shm.so
LoadModule slotmem_plain_module modules/mod_slotmem_plain.so
LoadModule unixd_module modules/mod_unixd.so
LoadModule status_module modules/mod_status.so
LoadModule autoindex_module modules/mod_autoindex.so
<IfModule unixd_module>
User daemon
Group daemon
</IfModule>
<Directory />
AllowOverride none
Require all denied
</Directory>
<Files ".ht*">
Require all denied
</Files>
ErrorLog /dev/stderr
LogLevel warn
<IfModule log_config_module>
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
LogFormat "%{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" proxy
LogFormat "%h %l %u %t \"%r\" %>s %b" common
<IfModule logio_module>
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O" combinedio
</IfModule>
SetEnvIf X-Forwarded-For "^.*\..*\..*\..*" forwarded
CustomLog /dev/stdout common
CustomLog /dev/stdout combined
CustomLog /dev/stdout proxy env=forwarded
</IfModule>
<Directory "/usr/local/apache2/cgi-bin">
AllowOverride None
Options None
Require all granted
</Directory>
<IfModule headers_module>
RequestHeader unset Proxy early
</IfModule>
<IfModule proxy_html_module>
Include conf/extra/proxy-html.conf
</IfModule>
<VirtualHost *:80>
<Location />
ProxyPass http://localhost:{{ tuple "elasticsearch" "internal" "client" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
ProxyPassReverse http://localhost:{{ tuple "elasticsearch" "internal" "client" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
AuthName "Elasticsearch"
AuthType Basic
AuthBasicProvider file ldap
AuthUserFile /usr/local/apache2/conf/.htpasswd
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
Require valid-user
</Location>
# Restrict access to the Elasticsearch Update API endpoint to prevent modification of indexed documents
<Location /*/_doc/*/_update*>
Require all denied
</Location>
# Restrict access to the Elasticsearch Update By Query API Endpoint to prevent modification of indexed documents
<Location /*/_update_by_query*>
Require all denied
</Location>
# Restrict access to the Elasticsearch Delete By Query API Endpoint to prevent deletion of indexed documents
<Location /*/_delete_by_query*>
Require all denied
</Location>
# Prohibit DELETE methods on the document API endpoint
<Location /*/_doc/*>
AllowMethods GET POST OPTIONS
ProxyPass http://localhost:{{ tuple "elasticsearch" "internal" "client" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
ProxyPassReverse http://localhost:{{ tuple "elasticsearch" "internal" "client" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
AuthName "Elasticsearch"
AuthType Basic
AuthBasicProvider file ldap
AuthUserFile /usr/local/apache2/conf/.htpasswd
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
Require valid-user
</Location>
</VirtualHost>
log4j2: |
status = error
appender.console.type = Console
appender.console.name = console
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] %marker%m%n
rootLogger.level = info
rootLogger.appenderRef.console.ref = console
init:
max_map_count: 262144
ceph:
admin_keyring: null
curator:
action_file:
# Remember, leave a key empty if there is no value. None will be a string,
# not a Python "NoneType"
#
# Also remember that all examples have 'disable_action' set to True. If you
# want to use this action as a template, be sure to set this to False after
# copying it.
#
# NOTE(srwilkers): The list of actions below is kept empty, and should be
# driven purely by overrides. As these items are injected as pure YAML,
# the desired configuration should include all fields as to avoid unwanted
# merges with a set of dummy default values. The supplied values can be
# used as an example
actions:
# 1:
# action: delete_indices
# description: >-
# "Delete indices older than 7 days"
# options:
# timeout_override:
# continue_if_exception: False
# ignore_empty_list: True
# disable_action: True
# filters:
# - filtertype: pattern
# kind: prefix
# value: logstash-
# - filtertype: age
# source: name
# direction: older
# timestring: '%Y.%m.%d'
# unit: days
# unit_count: 7
# 2:
# action: delete_indices
# description: >-
# "Delete indices by age if available disk space is
# less than 80% total disk"
# options:
# timeout_override: 600
# continue_if_exception: False
# ignore_empty_list: True
# disable_action: True
# filters:
# - filtertype: pattern
# kind: prefix
# value: logstash-
# - filtertype: space
# source: creation_date
# use_age: True
# # This space assumes the default PVC size of 5Gi times three data
# # replicas. This must be adjusted if changed due to Curator being
# # unable to calculate percentages of total disk space
# disk_space: 12
# 3:
# action: snapshot
# description: >-
# "Snapshot indices older than one day"
# options:
# repository: logstash_snapshots
# # Leaving this blank results in the default name format
# name:
# wait_for_completion: True
# max_wait: 3600
# wait_interval: 10
# timeout_override: 600
# ignore_empty_list: True
# continue_if_exception: False
# disable_action: True
# filters:
# - filtertype: age
# source: name
# direction: older
# timestring: '%Y.%m.%d'
# unit: days
# unit_count: 1
# 4:
# action: delete_snapshots
# description: >-
# "Delete snapshots older than 30 days"
# options:
# repository: logstash_snapshots
# disable_action: True
# timeout_override: 600
# ignore_empty_list: True
# filters:
# - filtertype: pattern
# kind: prefix
# value: curator-
# exclude:
# - filtertype: age
# source: creation_date
# direction: older
# unit: days
# unit_count: 30
config:
# Remember, leave a key empty if there is no value. None will be a string,
# not a Python "NoneType"
client:
hosts:
- ${ELASTICSEARCH_HOST}
use_ssl: False
ssl_no_validate: False
timeout: 60
logging:
loglevel: INFO
logformat: logstash
blacklist: ['elasticsearch', 'urllib3']
elasticsearch:
config:
bootstrap:
memory_lock: true
cluster:
name: elasticsearch
discovery:
zen:
ping.unicast.hosts: ${DISCOVERY_SERVICE}
minimum_master_nodes: 2
http:
enabled: ${HTTP_ENABLE}
compression: true
network:
host: 0.0.0.0
cloud:
aws:
protocol: http
s3:
# NOTE(srwilkers): This gets configured dynamically via endpoint
# lookups
endpoint: null
node:
ingest: ${NODE_INGEST}
master: ${NODE_MASTER}
data: ${NODE_DATA}
name: ${NODE_NAME}
max_local_storage_nodes: 3
path:
data: /usr/share/elasticsearch/data
logs: /usr/share/elasticsearch/logs
snapshots:
enabled: false
# NOTE(srwilkers): The path for the radosgw s3 endpoint gets populated
# dynamically with this value to ensure the bucket name and s3 compatible
# radosgw endpoint/path match
bucket: elasticsearch_bucket
repositories:
logstash:
name: logstash_snapshots
env:
java_opts:
client: "-Xms256m -Xmx256m"
data: "-Xms256m -Xmx256m"
master: "-Xms256m -Xmx256m"
prometheus_elasticsearch_exporter:
es:
all: true
timeout: 20s
templates:
fluent:
template: "logstash-*"
index_patterns: "logstash-*"
settings:
number_of_shards: 1
mappings:
fluent:
properties:
kubernetes:
properties:
container_name:
type: keyword
index: false
docker_id:
type: keyword
index: false
host:
type: keyword
index: false
namespace_name:
type: keyword
index: false
pod_id:
type: keyword
index: false
pod_name:
type: keyword
index: false
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
elasticsearch:
name: elasticsearch
namespace: null
auth:
admin:
username: admin
password: changeme
hosts:
data: elasticsearch-data
default: elasticsearch-logging
discovery: elasticsearch-discovery
public: elasticsearch
host_fqdn_override:
default: null
# NOTE(srwilkers): this chart supports TLS for fqdn over-ridden public
# endpoints using the following format:
# public:
# host: null
# tls:
# crt: null
# key: null
path:
default: null
scheme:
default: http
port:
client:
default: 9200
http:
default: 80
discovery:
default: 9300
prometheus_elasticsearch_exporter:
namespace: null
hosts:
default: elasticsearch-exporter
host_fqdn_override:
default: null
path:
default: /metrics
scheme:
default: 'http'
port:
metrics:
default: 9108
ldap:
hosts:
default: ldap
auth:
admin:
bind: "cn=admin,dc=cluster,dc=local"
password: password
host_fqdn_override:
default: null
path:
default: "/ou=People,dc=cluster,dc=local"
scheme:
default: ldap
port:
ldap:
default: 389
ceph_object_store:
name: radosgw
namespace: null
auth:
elasticsearch:
username: elasticsearch
access_key: "elastic_access_key"
secret_key: "elastic_secret_key"
admin:
username: s3_admin
access_key: "admin_access_key"
secret_key: "admin_secret_key"
hosts:
default: ceph-rgw
public: radosgw
host_fqdn_override:
default: null
path:
default: null
scheme:
default: http
port:
api:
default: 8088
public: 80
monitoring:
prometheus:
enabled: false
elasticsearch_exporter:
scrape: true
network:
elasticsearch:
ingress:
public: true
classes:
namespace: "nginx"
cluster: "nginx-cluster"
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
node_port:
enabled: false
port: 30920
storage:
enabled: true
pvc:
name: pvc-elastic
access_mode: [ "ReadWriteOnce" ]
requests:
storage: 5Gi
storage_class: general
manifests:
configmap_bin_curator: true
configmap_bin_elasticsearch: true
configmap_etc_curator: true
configmap_etc_elasticsearch: true
configmap_etc_templates: true
cron_curator: true
cron_verify_repositories: true
deployment_client: true
deployment_master: true
ingress: true
job_cluster_wait: true
job_elasticsearch_templates: true
job_image_repo_sync: true
job_snapshot_repository: true
job_s3_user: true
job_s3_bucket: true
helm_tests: true
secret_elasticsearch: true
secret_s3: true
monitoring:
prometheus:
configmap_bin_exporter: true
deployment_exporter: true
network_policy_exporter: false
service_exporter: true
network_policy: false
service_data: true
service_discovery: true
service_ingress: true
service_logging: true
statefulset_data: true