RabbitMQ: use DNS for cluster discovery, not etcd

Implements: blueprint rabbitmq-dns-discovery

Some useful things to note:

1. This uses a StatefulSet instead of a Deployment.  The reason for this
   is that when RabbitMQ uses DNS for peer discovery, the first thing it
   does when trying to join a node is attempt a reverse-dns lookup.
   This reverse lookup works when using a StatefulSet, but not a
   Deployment.
2. The RabbitMQ configuration was updated to use the new sysctl-style
   format.  It seems that the new format is required to configure the
   new autoclustering features.  Additionally, I found that this
   generate much clearer error messages than the straight erlang format.
3. I removed the `is-node-properly-clustered` test in the liveness and
   readiness probes.  This probe isn't directly supported in 3.7.0,
   and it wasn't clear that a clustering check was appropriate for each
   node.

Change-Id: Ieefbb2205bd77fbac04abcd051fb06fce62e8d97
This commit is contained in:
Mark Burnett 2017-05-04 09:54:26 -05:00
parent bab04be137
commit 0982a823e1
10 changed files with 65 additions and 115 deletions

View File

@ -93,16 +93,3 @@ is-node-healthy() {
echo "$result" | prepend-log-prefix
return 1
}
is-node-properly-clustered() {
result="$(rabbitmqctl eval 'autocluster:cluster_health_check().' 2>&1)"
if [[ $result =~ ^SUCCESS: ]]; then
return 0
elif [[ $result =~ ^FAILURE: ]]; then
echo "$result" | prepend-log-prefix
return 1
fi
log-it "Unexpected health-check output, giving the node the benefit of the doubt"
echo "$result" | prepend-log-prefix
return 0
}

View File

@ -45,10 +45,6 @@ main() {
log-it "Node is unhealthy"
return 1
fi
if ! is-node-properly-clustered; then
log-it "Found clustering inconsistency, giving up"
return 1
fi
return 0
;;
stale) # node has started long ago - it shoud be either ready or dead
@ -56,10 +52,6 @@ main() {
log-it "Long-running node become unhealthy"
return 1
fi
if ! is-node-properly-clustered; then
echo "Long-running node became inconsistent with the rest of the cluster"
return 1
fi
return 0
;;
*)

View File

@ -32,13 +32,6 @@ main() {
log-it "Node is unhealthy"
return 1
fi
{{ if gt (.Values.replicas | int) 1 -}}
if ! is-node-properly-clustered; then
log-it "Node is inconsistent with the rest of the cluster"
return 1
fi
{{- end }}
return 0
}

View File

@ -27,6 +27,5 @@ data:
{{ tuple "etc/_erlang.cookie.tpl" . | include "helm-toolkit.template" | indent 4 }}
rabbitmq-env.conf: |
{{ tuple "etc/_rabbitmq-env.conf.tpl" . | include "helm-toolkit.template" | indent 4 }}
rabbitmq.config: |
{{ tuple "etc/_rabbitmq.config.tpl" . | include "helm-toolkit.template" | indent 4 }}
rabbitmq.conf: |
{{ tuple "etc/_rabbitmq.conf.tpl" . | include "helm-toolkit.template" | indent 4 }}

View File

@ -12,10 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
CONFIG_FILE=/etc/rabbitmq/rabbitmq.conf
RABBITMQ_LOGS=-
RABBITMQ_SASL_LOGS=-
AUTOCLUSTER_TYPE=etcd
AUTOCLUSTER_DELAY={{ .Values.autocluster.delay }}
RABBITMQ_USE_LONGNAME=true
AUTOCLUSTER_LOG_LEVEL={{ .Values.autocluster.log_level }}
NODENAME="rabbit@${RABBITMQ_POD_IP}"
NODENAME="rabbit@$(hostname -f)"

View File

@ -0,0 +1,29 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
listeners.tcp.other_ip = 0.0.0.0:{{ .Values.network.port.public }}
default_user = {{ .Values.auth.default_user }}
default_pass = {{ .Values.auth.default_pass }}
loopback_users.guest = false
autocluster.peer_discovery_backend = rabbit_peer_discovery_dns
autocluster.dns.hostname = rabbitmq-discovery.{{ .Release.Namespace }}.svc.cluster.local
autocluster.node_type = disc
cluster_keepalive_interval = 30000
cluster_partition_handling = ignore
queue_master_locator = random

View File

@ -1,41 +0,0 @@
% Copyright 2017 The Openstack-Helm Authors.
%
% Licensed under the Apache License, Version 2.0 (the "License");
% you may not use this file except in compliance with the License.
% You may obtain a copy of the License at
%
% http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS,
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
% See the License for the specific language governing permissions and
% limitations under the License.
[
{rabbit, [
{dummy_param_without_comma, true}
,{tcp_listeners, [
{"0.0.0.0", {{ .Values.network.port.public }} }
]}
,{default_user, <<"{{ .Values.auth.default_user }}">>}
,{default_pass, <<"{{ .Values.auth.default_pass }}">>}
,{loopback_users, []}
,{cluster_partition_handling, ignore}
,{queue_master_locator, <<"random">>}
]}
,{autocluster, [
{dummy_param_without_comma, true}
,{backend, etcd}
,{autocluster_log_level,{{ .Values.autocluster.log_level }}}
,{autocluster_failure, stop}
,{cleanup_interval, 30}
,{cluster_cleanup, true}
,{cleanup_warn_only, false}
,{etcd_node_ttl, 15}
,{etcd_scheme, http}
,{etcd_host, {{ .Values.endpoints.etcd.hosts.default }}}
,{etcd_port, {{ .Values.endpoints.etcd.port }}}
]}
].
% EOF

View File

@ -0,0 +1,26 @@
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: Service
metadata:
name: rabbitmq-discovery
annotations:
service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
spec:
clusterIP: None
selector:
app: rabbitmq
ports:
- port: {{.Values.network.port.public}}

View File

@ -12,32 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
{{- $envAll := . }}
{{- $dependencies := .Values.dependencies }}
kind: Deployment
apiVersion: extensions/v1beta1
kind: StatefulSet
apiVersion: apps/v1beta1
metadata:
name: rabbitmq
spec:
replicas: {{ .Values.replicas }}
revisionHistoryLimit: {{ .Values.upgrades.revision_history }}
strategy:
type: {{ .Values.upgrades.pod_replacement_strategy }}
{{ if eq .Values.upgrades.pod_replacement_strategy "RollingUpdate" }}
rollingUpdate:
maxUnavailable: {{ .Values.upgrades.rolling_update.max_unavailable }}
maxSurge: {{ .Values.upgrades.rolling_update.max_surge }}
{{ end }}
serviceName: rabbitmq-discovery
template:
metadata:
labels:
app: rabbitmq
annotations:
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.hash" }}
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.hash" }}
pod.beta.kubernetes.io/init-containers: '[
{{ tuple $envAll $dependencies "[]" | include "helm-toolkit.kubernetes_entrypoint_init_container" | indent 10 }}
]'
# TODO: this needs to be moved to common.
scheduler.alpha.kubernetes.io/affinity: >
{
@ -82,11 +68,6 @@ spec:
command:
- bash
- /scripts/start.sh
env:
- name: RABBITMQ_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
readinessProbe:
timeoutSeconds: {{ .Values.probes_timeout }}
exec:
@ -115,5 +96,5 @@ spec:
mountPath: /etc/rabbitmq/rabbitmq-env.conf
subPath: rabbitmq-env.conf
- name: rabbitmq-etc
mountPath: /etc/rabbitmq/rabbitmq.config
subPath: rabbitmq.config
mountPath: /etc/rabbitmq/rabbitmq.conf
subPath: rabbitmq.conf

View File

@ -51,28 +51,14 @@ network:
management: '15672'
images:
rabbitmq: "quay.io/attcomdev/fuel-mcp-rabbitmq:ocata-unstable"
dep_check: "quay.io/stackanetes/kubernetes-entrypoint:v0.1.1"
rabbitmq: "quay.io/attcomdev/rabbitmq:3.7.0-pre-14"
pull_policy: "IfNotPresent"
enabled_plugins:
- autocluster
erlang_cookie: openstack-cookie
endpoints:
etcd:
hosts:
default: etcd
port: 2379
autocluster:
log_level: info
delay: 15
probes_delay: 180
probes_timeout: 10
dependencies:
service:
- etcd