From c4acf1ce6af8d8edd9c48e37f596788e1558f46f Mon Sep 17 00:00:00 2001 From: Christian Berendt Date: Tue, 23 Sep 2014 21:36:16 +0200 Subject: [PATCH] Improve chapter "HA using active/passive - The Pacemaker cluster stack" Change-Id: I3c8fd184cdfe5da6031a3ee7b1bf78f04a4de16c --- doc/high-availability-guide/ch_pacemaker.xml | 55 +++--- .../pacemaker/section_install_packages.xml | 79 ++++---- .../section_set_basic_cluster_properties.xml | 92 ++++----- .../pacemaker/section_set_up_corosync.xml | 185 ++++++++---------- .../pacemaker/section_start_pacemaker.xml | 62 +++--- .../pacemaker/section_starting_corosync.xml | 80 ++++---- 6 files changed, 275 insertions(+), 278 deletions(-) diff --git a/doc/high-availability-guide/ch_pacemaker.xml b/doc/high-availability-guide/ch_pacemaker.xml index 7c200e46..49b76ef9 100644 --- a/doc/high-availability-guide/ch_pacemaker.xml +++ b/doc/high-availability-guide/ch_pacemaker.xml @@ -1,34 +1,31 @@ - - - The Pacemaker cluster stack - - OpenStack infrastructure high availability relies on the -Pacemaker cluster stack, the -state-of-the-art high availability and load balancing stack for the -Linux platform. Pacemaker is storage and application-agnostic, and is -in no way specific to OpenStack. - Pacemaker relies on the Corosync messaging -layer for reliable cluster communications. Corosync implements the -Totem single-ring ordering and membership protocol. It also provides UDP -and InfiniBand based messaging, quorum, and cluster membership to -Pacemaker. - Pacemaker interacts with applications through resource agents (RAs), -of which it supports over 70 natively. Pacemaker can also easily use -third-party RAs. An OpenStack high-availability configuration uses -existing native Pacemaker RAs (such as those managing MySQL -databases or virtual IP addresses), existing third-party RAs (such as -for RabbitMQ), and native OpenStack RAs (such as those managing the -OpenStack Identity and Image Services). - - - - - - - - + The Pacemaker cluster stack + OpenStack infrastructure high availability relies on the + Pacemaker cluster + stack, the state-of-the-art high availability and load balancing stack + for the Linux platform. Pacemaker is storage and application-agnostic, + and is in no way specific to OpenStack. + Pacemaker relies on the + Corosync messaging + layer for reliable cluster communications. Corosync implements the + Totem single-ring ordering and membership protocol. It also provides + UDP and InfiniBand based messaging, quorum, and cluster membership to + Pacemaker. + Pacemaker interacts with applications through resource agents + (RAs), of which it supports over 70 natively. Pacemaker can also + easily use third-party RAs. An OpenStack high-availability + configuration uses existing native Pacemaker RAs (such as those + managing MySQL databases or virtual IP addresses), existing third-party + RAs (such as for RabbitMQ), and native OpenStack RAs (such as those + managing the OpenStack Identity and Image Services). + + + + + + diff --git a/doc/high-availability-guide/pacemaker/section_install_packages.xml b/doc/high-availability-guide/pacemaker/section_install_packages.xml index f3a7d286..606a22e8 100644 --- a/doc/high-availability-guide/pacemaker/section_install_packages.xml +++ b/doc/high-availability-guide/pacemaker/section_install_packages.xml @@ -1,43 +1,44 @@ -
+
- Install packages - On any host that is meant to be part of a Pacemaker cluster, you must -first establish cluster communications through the Corosync messaging -layer. This involves installing the following packages (and their -dependencies, which your package manager will normally install -automatically): - - - pacemaker (Note that the crm shell should be downloaded separately.) - - - - - crmsh - - - - - corosync - - - - - cluster-glue - - - - fence-agents (Fedora only; all other distributions use fencing - agents from cluster-glue) - - - - - resource-agents - - - -
+ Install packages + On any host that is meant to be part of a Pacemaker cluster, you must + first establish cluster communications through the Corosync messaging + layer. This involves installing the following packages (and their + dependencies, which your package manager will normally install + automatically): + + + pacemaker (Note that the crm shell should be + downloaded separately.) + + + + crmsh + + + + + corosync + + + + + cluster-glue + + + + fence-agents (Fedora only; all other + distributions use fencing agents from + cluster-glue) + + + + resource-agents + + + +
diff --git a/doc/high-availability-guide/pacemaker/section_set_basic_cluster_properties.xml b/doc/high-availability-guide/pacemaker/section_set_basic_cluster_properties.xml index 70275a87..2bef42b2 100644 --- a/doc/high-availability-guide/pacemaker/section_set_basic_cluster_properties.xml +++ b/doc/high-availability-guide/pacemaker/section_set_basic_cluster_properties.xml @@ -1,54 +1,54 @@ -
+
- - Set basic cluster properties - - Once your Pacemaker cluster is set up, it is recommended to set a few -basic cluster properties. To do so, start the crm shell and change -into the configuration menu by entering -configure. Alternatively, you may jump straight into the Pacemaker -configuration menu by typing crm configure directly from a shell -prompt. - Then, set the following properties: - property no-quorum-policy="ignore" \ # + Set basic cluster properties + Once your Pacemaker cluster is set up, it is recommended to set a few + basic cluster properties. To do so, start the crm shell + and change into the configuration menu by entering + configure. Alternatively, you may jump straight into + the Pacemaker configuration menu by typing crm configure + directly from a shell prompt. + Then, set the following properties: + property no-quorum-policy="ignore" \ # pe-warn-series-max="1000" \ # pe-input-series-max="1000" \ pe-error-series-max="1000" \ cluster-recheck-interval="5min" # - - - -Setting no-quorum-policy="ignore" is required in 2-node Pacemaker -clusters for the following reason: if quorum enforcement is enabled, -and one of the two nodes fails, then the remaining node can not -establish a majority of quorum votes necessary to run services, and -thus it is unable to take over any resources. In this case, the appropriate -workaround is to ignore loss of quorum in the cluster. This should only only be done in 2-node clusters: do not set this property in -Pacemaker clusters with more than two nodes. Note that a two-node cluster with this setting exposes a risk of split-brain because either half of the cluster, or both, are able to become active in the event that both nodes remain online but lose communication with one another. The preferred configuration is 3 or more nodes per cluster. - - - - -Setting pe-warn-series-max, pe-input-series-max and -pe-error-series-max to 1000 instructs Pacemaker to keep a longer -history of the inputs processed, and errors and warnings generated, by -its Policy Engine. This history is typically useful in case cluster -troubleshooting becomes necessary. - - - - -Pacemaker uses an event-driven approach to cluster state -processing. However, certain Pacemaker actions occur at a configurable -interval, cluster-recheck-interval, which defaults to 15 minutes. It -is usually prudent to reduce this to a shorter interval, such as 5 or -3 minutes. - - - - Once you have made these changes, you may commit the updated -configuration. -
+ + + Setting is required + in 2-node Pacemaker clusters for the following reason: if quorum + enforcement is enabled, and one of the two nodes fails, then the + remaining node can not establish a majority of quorum votes necessary + to run services, and thus it is unable to take over any resources. In + this case, the appropriate workaround is to ignore loss of quorum in + the cluster. This should only only be done in 2-node clusters: do not + set this property in Pacemaker clusters with more than two nodes. Note + that a two-node cluster with this setting exposes a risk of + split-brain because either half of the cluster, or both, are able to + become active in the event that both nodes remain online but lose + communication with one another. The preferred configuration is 3 or + more nodes per cluster. + + + Setting , + and + to 1000 instructs Pacemaker to + keep a longer history of the inputs processed, and errors and warnings + generated, by its Policy Engine. This history is typically useful in + case cluster troubleshooting becomes necessary. + + + Pacemaker uses an event-driven approach to cluster state + processing. However, certain Pacemaker actions occur at a configurable + interval, , which defaults to + 15 minutes. It is usually prudent to reduce this to a shorter interval, + such as 5 or 3 minutes. + + + Once you have made these changes, you may commit + the updated configuration. +
diff --git a/doc/high-availability-guide/pacemaker/section_set_up_corosync.xml b/doc/high-availability-guide/pacemaker/section_set_up_corosync.xml index ef2805a6..560f1494 100644 --- a/doc/high-availability-guide/pacemaker/section_set_up_corosync.xml +++ b/doc/high-availability-guide/pacemaker/section_set_up_corosync.xml @@ -1,21 +1,20 @@ -
+
- Set up Corosync - Besides installing the corosync package, you must also -create a configuration file, stored in -/etc/corosync/corosync.conf. Most distributions ship an example -configuration file (corosync.conf.example) as part of the -documentation bundled with the corosync package. An example Corosync -configuration file is shown below: - - - Corosync configuration file (<filename>corosync.conf</filename>) - - - totem { + Set up Corosync + Besides installing the corosync package, you must + also create a configuration file, stored in + /etc/corosync/corosync.conf. Most distributions ship + an example configuration file (corosync.conf.example) + as part of the documentation bundled with the corosync + package. An example Corosync configuration file is shown below: + + Corosync configuration file (<filename>corosync.conf</filename>) + + totem { version: 2 # Time (in ms) to wait for a token @@ -80,87 +79,77 @@ logging { subsys: AMF debug: off tags: enter|leave|trace1|trace2|trace3|trace4|trace6 - } -} - - - - - -The token value specifies the time, in milliseconds, during -which the Corosync token is expected to be transmitted around the -ring. When this timeout expires, the token is declared lost, and after -token_retransmits_before_loss_const lost tokens the non-responding -processor (cluster node) is declared dead. In other words, -token × token_retransmits_before_loss_const is the maximum -time a node is allowed to not respond to cluster messages before being -considered dead. The default for token is 1000 (1 second), with 4 -allowed retransmits. These defaults are intended to minimize failover -times, but can cause frequent "false alarms" and unintended failovers -in case of short network interruptions. The values used here are -safer, albeit with slightly extended failover times. - - - - -With secauth enabled, Corosync nodes mutually authenticate using -a 128-byte shared secret stored in /etc/corosync/authkey, which may -be generated with the corosync-keygen utility. When using secauth, -cluster communications are also encrypted. - - - - -In Corosync configurations using redundant networking (with more -than one interface), you must select a Redundant Ring Protocol (RRP) -mode other than none. active is the recommended RRP mode. - - - - -There are several things to note about the recommended interface -configuration: - - - - -The ringnumber must differ between all configured interfaces, - starting with 0. - - - - -The bindnetaddr is the network address of the interfaces to bind - to. The example uses two network addresses of /24 IPv4 subnets. - - - - -Multicast groups (mcastaddr) must not be reused across cluster - boundaries. In other words, no two distinct clusters should ever use - the same multicast group. Be sure to select multicast addresses - compliant with RFC 2365, - "Administratively Scoped IP Multicast". - - - - -For firewall configurations, note that Corosync communicates over - UDP only, and uses mcastport (for receives) and mcastport-1 (for - sends). - - - - - - -The service declaration for the pacemaker service may be -placed in the corosync.conf file directly, or in its own separate -file, /etc/corosync/service.d/pacemaker. - - - - Once created, the corosync.conf file (and the authkey file if the -secauth option is enabled) must be synchronized across all cluster -nodes. -
+ }} + + + + + The value specifies the time, in + milliseconds, during which the Corosync token is expected to be + transmitted around the ring. When this timeout expires, the token is + declared lost, and after + lost tokens the non-responding processor (cluster node) is declared + dead. In other words, + × + is the maximum time a node is allowed to not respond to cluster + messages before being considered dead. The default for + is 1000 (1 second), with 4 allowed + retransmits. These defaults are intended to minimize failover times, + but can cause frequent "false alarms" and unintended failovers in case + of short network interruptions. The values used here are safer, albeit + with slightly extended failover times. + + + With enabled, Corosync nodes mutually + authenticate using a 128-byte shared secret stored in + /etc/corosync/authkey, which may be generated with + the corosync-keygen utility. When using + , cluster communications are also + encrypted. + + + In Corosync configurations using redundant networking (with more + than one ), you must select a Redundant + Ring Protocol (RRP) mode other than none. + active is the recommended RRP mode. + + + There are several things to note about the recommended interface + configuration: + + + The must differ between all + configured interfaces, starting with 0. + + + The is the network address of + the interfaces to bind to. The example uses two network addresses + of /24 IPv4 subnets. + + + Multicast groups () must not be + reused across cluster boundaries. In other words, no two distinct + clusters should ever use the same multicast group. Be sure to + select multicast addresses compliant with + RFC 2365, + "Administratively Scoped IP Multicast". + + + For firewall configurations, note that Corosync communicates + over UDP only, and uses mcastport (for receives) + and mcastport - 1 (for sends). + + + + + The service declaration for the + pacemaker service may be placed in the + corosync.conf file directly, or in its own + separate file, + /etc/corosync/service.d/pacemaker. + + + Once created, the corosync.conf file (and the + authkey file if the option + is enabled) must be synchronized across all cluster nodes. +
diff --git a/doc/high-availability-guide/pacemaker/section_start_pacemaker.xml b/doc/high-availability-guide/pacemaker/section_start_pacemaker.xml index b6c603e4..42e36474 100644 --- a/doc/high-availability-guide/pacemaker/section_start_pacemaker.xml +++ b/doc/high-availability-guide/pacemaker/section_start_pacemaker.xml @@ -1,34 +1,40 @@ -
+
- Start Pacemaker - Once the Corosync services have been started and you have established -that the cluster is communicating properly, it is safe to start -pacemakerd, the Pacemaker master control process: - - - /etc/init.d/pacemaker start (LSB) - - - - service pacemaker start (LSB, alternate) - - - - start pacemaker (upstart) - - - - systemctl start pacemaker (systemd) - - - - Once Pacemaker services have started, Pacemaker will create a default -empty cluster configuration with no resources. You may observe -Pacemaker’s status with the crm_mon utility: - ============ + Start Pacemaker + Once the Corosync services have been started and you have established + that the cluster is communicating properly, it is safe to start + pacemakerd, the Pacemaker + master control process: + + + + /etc/init.d/pacemaker start (LSB) + + + + + service pacemaker start (LSB, alternate) + + + + + start pacemaker (upstart) + + + + + systemctl start pacemaker (systemd) + + + + Once Pacemaker services have started, Pacemaker will create a default + empty cluster configuration with no resources. You may observe + Pacemaker's status with the crm_mon utility: + ============ Last updated: Sun Oct 7 21:07:52 2012 Last change: Sun Oct 7 20:46:00 2012 via cibadmin on node2 Stack: openais @@ -39,4 +45,4 @@ Version: 1.1.6-9971ebba4494012a93c03b40a2c58ec0eb60f50c ============ Online: [ node2 node1 ] -
+
diff --git a/doc/high-availability-guide/pacemaker/section_starting_corosync.xml b/doc/high-availability-guide/pacemaker/section_starting_corosync.xml index 3edb131b..da792ced 100644 --- a/doc/high-availability-guide/pacemaker/section_starting_corosync.xml +++ b/doc/high-availability-guide/pacemaker/section_starting_corosync.xml @@ -1,38 +1,42 @@ -
+
- - Starting Corosync - - Corosync is started as a regular system service. Depending on your -distribution, it may ship with an LSB init script, an -upstart job, or a systemd unit file. Either way, the service is -usually named corosync: - - - /etc/init.d/corosync start (LSB) - - - - service corosync start (LSB, alternate) - - - - start corosync (upstart) - - - - systemctl start corosync (systemd) - - - - You can now check the Corosync connectivity with two tools. - The corosync-cfgtool utility, when invoked with the -s option, -gives a summary of the health of the communication rings: - # corosync-cfgtool -s -Printing ring status. + Starting Corosync + Corosync is started as a regular system service. Depending on your + distribution, it may ship with an LSB init script, an + upstart job, or a systemd unit file. Either way, the service is + usually named corosync: + + + + /etc/init.d/corosync start (LSB) + + + + + service corosync start (LSB, alternate) + + + + + start corosync (upstart) + + + + + systemctl start corosync (systemd) + + + + You can now check the Corosync connectivity with two tools. + The corosync-cfgtool utility, when invoked with + the option, gives a summary of the health of the + communication rings: + # corosync-cfgtool -s + Printing ring status. Local node ID 435324542 RING ID 0 id = 192.168.42.82 @@ -40,15 +44,15 @@ RING ID 0 RING ID 1 id = 10.0.42.100 status = ring 1 active with no faults - The corosync-objctl utility can be used to dump the Corosync cluster -member list: - # corosync-objctl runtime.totem.pg.mrp.srp.members -runtime.totem.pg.mrp.srp.435324542.ip=r(0) ip(192.168.42.82) r(1) ip(10.0.42.100) + The corosync-objctl utility can be used to dump the + Corosync cluster member list: + # corosync-objctl runtime.totem.pg.mrp.srp.members + runtime.totem.pg.mrp.srp.435324542.ip=r(0) ip(192.168.42.82) r(1) ip(10.0.42.100) runtime.totem.pg.mrp.srp.435324542.join_count=1 runtime.totem.pg.mrp.srp.435324542.status=joined runtime.totem.pg.mrp.srp.983895584.ip=r(0) ip(192.168.42.87) r(1) ip(10.0.42.254) runtime.totem.pg.mrp.srp.983895584.join_count=1 runtime.totem.pg.mrp.srp.983895584.status=joined - You should see a status=joined entry for each of your constituent -cluster nodes. -
+ You should see a status=joined entry for each of + your constituent cluster nodes. +