[ha-guide] Migrate HA Guide into openstack-manuals

Discussion at mailing list: http://lists.openstack.org/pipermail/openstack-docs/2016-May/008532.html Change-Id: Icc6761cdda2ca820447153fa7ec046e22cc98129
2016-05-05 18:47:49 +09:00
parent 99da0042b4
commit 2809ad9edb
51 changed files with 12936 additions and 2 deletions
--- a/doc-tools-check-languages.conf
+++ b/doc-tools-check-languages.conf
@@ -33,6 +33,7 @@ declare -A SPECIAL_BOOKS=(
    ["common"]="RST"
    ["admin-guide"]="RST"
    ["arch-design"]="RST"
+    ["ha-guide"]="RST"
    ["image-guide"]="RST"
    ["install-guide"]="RST"
    ["networking-guide"]="RST"
--- a/doc/ha-guide/setup.cfg
+++ b/doc/ha-guide/setup.cfg
@@ -0,0 +1,30 @@
+[metadata]
+name = openstackhaguide
+summary = OpenStack High Availability Guide
+author = OpenStack
+author-email = openstack-docs@lists.openstack.org
+home-page = http://docs.openstack.org/
+classifier =
+Environment :: OpenStack
+Intended Audience :: Information Technology
+Intended Audience :: System Administrators
+License :: OSI Approved :: Apache Software License
+Operating System :: POSIX :: Linux
+Topic :: Documentation
+
+[global]
+setup-hooks =
+    pbr.hooks.setup_hook
+
+[files]
+
+[build_sphinx]
+all_files = 1
+build-dir = build
+source-dir = source
+
+[wheel]
+universal = 1
+
+[pbr]
+warnerrors = True
--- a/doc/ha-guide/setup.py
+++ b/doc/ha-guide/setup.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# Copyright (c) 2013 Hewlett-Packard Development Company, L.P.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# THIS FILE IS MANAGED BY THE GLOBAL REQUIREMENTS REPO - DO NOT EDIT
+import setuptools
+
+# In python < 2.7.4, a lazy loading of package `pbr` will break
+# setuptools if some other modules registered functions in `atexit`.
+# solution from: http://bugs.python.org/issue15881#msg170215
+try:
+    import multiprocessing  # noqa
+except ImportError:
+    pass
+
+setuptools.setup(
+    setup_requires=['pbr'],
+    pbr=True)
--- a/doc/ha-guide/source/common
+++ b/doc/ha-guide/source/common
@@ -0,0 +1 @@
+../../common
--- a/doc/ha-guide/source/compute-node-ha-api.rst
+++ b/doc/ha-guide/source/compute-node-ha-api.rst
@@ -0,0 +1,12 @@
+
+============================================
+Configure high availability on compute nodes
+============================================
+
+The `Installation Guide
+<http://docs.openstack.org/liberty/#install-guides>`_
+gives instructions for installing multiple compute nodes.
+To make them highly available,
+you must configure the environment
+to include multiple instances of the API
+and other services.
--- a/doc/ha-guide/source/compute-node-ha.rst
+++ b/doc/ha-guide/source/compute-node-ha.rst
@@ -0,0 +1,10 @@
+
+==================================================
+Configuring the compute node for high availability
+==================================================
+
+.. toctree::
+   :maxdepth: 2
+
+   compute-node-ha-api.rst
+
--- a/doc/ha-guide/source/conf.py
+++ b/doc/ha-guide/source/conf.py
@@ -0,0 +1,290 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import os
+# import sys
+
+import openstackdocstheme
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+# sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = []
+
+# Add any paths that contain templates here, relative to this directory.
+# templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+# source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'High Availability Guide'
+bug_tag = u'ha-guide'
+copyright = u'2015, OpenStack contributors'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.0.1'
+# The full version, including alpha/beta/rc tags.
+release = '0.0.1'
+
+# A few variables have to be set for the log-a-bug feature.
+#   giturl: The location of conf.py on Git. Must be set manually.
+#   gitsha: The SHA checksum of the bug description. Automatically extracted from git log.
+#   bug_tag: Tag for categorizing the bug. Must be set manually.
+# These variables are passed to the logabug code via html_context.
+giturl = u'http://git.openstack.org/cgit/openstack/openstack-manuals/tree/doc/ha-guide/source'
+git_cmd = "/usr/bin/git log | head -n1 | cut -f2 -d' '"
+gitsha = os.popen(git_cmd).read().strip('\n')
+html_context = {"gitsha": gitsha, "bug_tag": bug_tag,
+                "giturl": giturl}
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+# language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+# today = ''
+# Else, today_fmt is used as the format for a strftime call.
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['common/cli*', 'common/nova*',
+                    'common/get_started*', 'common/dashboard*']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+# keep_warnings = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'openstackdocs'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+# html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = [openstackdocstheme.get_html_theme_path()]
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+# html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+# html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+# html_static_path = []
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+# html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+# So that we can enable "log-a-bug" links from each output HTML page, this
+# variable must be set to a format that includes year, month, day, hours and
+# minutes.
+html_last_updated_fmt = '%Y-%m-%d %H:%M'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+# html_domain_indices = True
+
+# If false, no index is generated.
+html_use_index = False
+
+# If true, the index is split into individual pages for each letter.
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+html_show_sourcelink = False
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'ha-guide'
+
+# If true, publish source files
+html_copy_source = False
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    # 'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    ('index', 'HAGuide.tex', u'High Availability Guide',
+     u'OpenStack contributors', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+# latex_appendices = []
+
+# If false, no module index is generated.
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'haguide', u'High Availability Guide',
+     [u'OpenStack contributors'], 1)
+]
+
+# If true, show URL addresses after external links.
+# man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    ('index', 'HAGuide', u'High Availability Guide',
+     u'OpenStack contributors', 'HAGuide',
+     'This guide shows OpenStack operators and deployers how to configure'
+     'OpenStack Networking to be robust and fault-tolerant.', 'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+# texinfo_no_detailmenu = False
+
+# -- Options for Internationalization output ------------------------------
+locale_dirs = ['locale/']
--- a/doc/ha-guide/source/controller-ha-galera-config.rst
+++ b/doc/ha-guide/source/controller-ha-galera-config.rst
@@ -0,0 +1,396 @@
+Configuration
+==============
+
+Before you launch Galera Cluster, you need to configure the server
+and the database to operate as part of the cluster.
+
+Configuring the server
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Certain services running on the underlying operating system of your
+OpenStack database may block Galera Cluster from normal operation
+or prevent ``mysqld`` from achieving network connectivity with the cluster.
+
+
+Firewall
+---------
+
+Galera Cluster requires that you open four ports to network traffic:
+
+- On ``3306``, Galera Cluster uses TCP for database client connections
+  and State Snapshot Transfers methods that require the client,
+  (that is, ``mysqldump``).
+- On ``4567`` Galera Cluster uses TCP for replication traffic. Multicast
+  replication uses both TCP and UDP on this port.
+- On ``4568`` Galera Cluster uses TCP for Incremental State Transfers.
+- On ``4444`` Galera Cluster uses TCP for all other State Snapshot Transfer
+  methods.
+
+.. seealso:: For more information on firewalls, see `Firewalls and default ports
+   <http://docs.openstack.org/liberty/config-reference/content/firewalls-default-ports.html>`_, in the Configuration Reference.
+
+
+
+``iptables``
+^^^^^^^^^^^^^
+
+For many Linux distributions, you can configure the firewall using
+the ``iptables`` utility. To do so, complete the following steps:
+
+#. For each cluster node, run the following commands, replacing
+   ``NODE-IP-ADDRESS`` with the IP address of the cluster node
+   you want to open the firewall to:
+
+   .. code-block:: console
+
+      # iptables --append INPUT --in-interface eth0 \
+         --protocol --match tcp --dport 3306 \
+         --source NODE-IP-ADDRESS --jump ACCEPT
+      # iptables --append INPUT --in-interface eth0 \
+         --protocol --match tcp --dport 4567 \
+         --source NODE-IP-ADDRESS --jump ACCEPT
+      # iptables --append INPUT --in-interface eth0 \
+         --protocol --match tcp --dport 4568 \
+         --source NODE-IP-ADDRESS --jump ACCEPT
+      # iptables --append INPUT --in-interface eth0 \
+         --protocol --match tcp --dport 4444 \
+         --source NODE-IP-ADDRESS --jump ACCEPT
+
+   In the event that you also want to configure multicast replication,
+   run this command as well:
+
+   .. code-block:: console
+
+      # iptables --append INPUT --in-interface eth0 \
+          --protocol udp --match udp --dport 4567 \
+        --source NODE-IP-ADDRESS --jump ACCEPT
+
+
+#. Make the changes persistent. For servers that use ``init``, use
+   the :command:`save` command:
+
+   .. code-block:: console
+
+      # service save iptables
+
+   For servers that use ``systemd``, you need to save the current packet
+   filtering to the path of the file that ``iptables`` reads when it starts.
+   This path can vary by distribution, but common locations are in the
+   ``/etc`` directory, such as:
+
+   - ``/etc/sysconfig/iptables``
+   - ``/etc/iptables/iptables.rules``
+
+   When you find the correct path, run the :command:`iptables-save` command:
+
+   .. code-block:: console
+
+      # iptables-save > /etc/sysconfig/iptables
+
+With the firewall configuration saved, whenever your OpenStack
+database starts.
+
+``firewall-cmd``
+^^^^^^^^^^^^^^^^^
+
+For many Linux distributions, you can configure the firewall using the
+``firewall-cmd`` utility for FirewallD. To do so, complete the following
+steps on each cluster node:
+
+#. Add the Galera Cluster service:
+
+   .. code-block:: console
+
+      # firewall-cmd --add-service=mysql
+
+#. For each instance of OpenStack database in your cluster, run the
+   following commands, replacing ``NODE-IP-ADDRESS`` with the IP address
+   of the cluster node you want to open the firewall to:
+
+   .. code-block:: console
+
+      # firewall-cmd --add-port=3306/tcp
+      # firewall-cmd --add-port=4567/tcp
+      # firewall-cmd --add-port=4568/tcp
+      # firewall-cmd --add-port=4444/tcp
+
+   In the event that you also want to configure mutlicast replication,
+   run this command as well:
+
+   .. code-block:: console
+
+      # firewall-cmd --add-port=4567/udp
+
+#. To make this configuration persistent, repeat the above commands
+   with the :option:`--permanent` option.
+
+   .. code-block:: console
+
+      # firewall-cmd --add-service=mysql --permanent
+      # firewall-cmd --add-port=3306/tcp --permanent
+      # firewall-cmd --add-port=4567/tcp --permanent
+      # firewall-cmd --add-port=4568/tcp --permanent
+      # firewall-cmd --add-port=4444/tcp --permanent
+      # firewall-cmd --add-port=4567/udp --permanent
+
+
+With the firewall configuration saved, whenever your OpenStack
+database starts.
+
+SELinux
+--------
+
+Security-Enhanced Linux is a kernel module for improving security on Linux
+operating systems. It is commonly enabled and configured by default on
+Red Hat-based distributions. In the context of Galera Cluster, systems with
+SELinux may block the database service, keep it from starting or prevent it
+from establishing network connections with the cluster.
+
+To configure SELinux to permit Galera Cluster to operate, complete
+the following steps on each cluster node:
+
+#. Using the ``semanage`` utility, open the relevant ports:
+
+   .. code-block:: console
+
+      # semanage port -a -t mysqld_port_t -p tcp 3306
+      # semanage port -a -t mysqld_port_t -p tcp 4567
+      # semanage port -a -t mysqld_port_t -p tcp 4568
+      # semanage port -a -t mysqld_port_t -p tcp 4444
+
+   In the event that you use multicast replication, you also need to
+   open ``4567`` to UDP traffic:
+
+   .. code-block:: console
+
+      # semanage port -a -t mysqld_port_t -p udp 4567
+
+#. Set SELinux to allow the database server to run:
+
+   .. code-block:: console
+
+      # semanage permissive -a mysqld_t
+
+With these options set, SELinux now permits Galera Cluster to operate.
+
+.. note:: Bear in mind, leaving SELinux in permissive mode is not a good
+        security practice. Over the longer term, you need to develop a
+        security policy for Galera Cluster and then switch SELinux back
+        into enforcing mode.
+
+        For more information on configuring SELinux to work with
+        Galera Cluster, see the `Documentation
+        <http://galeracluster.com/documentation-webpages/selinux.html>`_
+
+
+AppArmor
+---------
+
+Application Armor is a kernel module for improving security on Linux
+operating systems. It is developed by Canonical and commonly used on
+Ubuntu-based distributions. In the context of Galera Cluster, systems
+with AppArmor may block the database service from operating normally.
+
+To configure AppArmor to work with Galera Cluster, complete the
+following steps on each cluster node:
+
+#. Create a symbolic link for the database server in the ``disable`` directory:
+
+   .. code-block:: console
+
+      # ln -s /etc/apparmor.d/usr /etc/apparmor.d/disable/.sbin.mysqld
+
+#. Restart AppArmor. For servers that use ``init``, run the following command:
+
+   .. code-block:: console
+
+      # service apparmor restart
+
+   For servers that use ``systemd``, instead run this command:
+
+   .. code-block:: console
+
+      # systemctl restart apparmor
+
+AppArmor now permits Galera Cluster to operate.
+
+
+Database configuration
+~~~~~~~~~~~~~~~~~~~~~~~
+
+MySQL databases, including MariaDB and Percona XtraDB, manage their
+configurations using a ``my.cnf`` file, which is typically located in the
+``/etc`` directory. Configuration options available in these databases are
+also available in Galera Cluster, with some restrictions and several
+additions.
+
+.. code-block:: ini
+
+   [mysqld]
+   datadir=/var/lib/mysql
+   socket=/var/lib/mysql/mysql.sock
+   user=mysql
+   binlog_format=ROW
+   bind-address=0.0.0.0
+
+   # InnoDB Configuration
+   default_storage_engine=innodb
+   innodb_autoinc_lock_mode=2
+   innodb_flush_log_at_trx_commit=0
+   innodb_buffer_pool_size=122M
+
+   # Galera Cluster Configuration
+   wsrep_provider=/usr/lib/libgalera_smm.so
+   wsrep_provider_options="pc.recovery=TRUE;gcache.size=300M"
+   wsrep_cluster_name="my_example_cluster"
+   wsrep_cluster_address="gcomm://GALERA1-IP,GALERA2-IP,GALERA3-IP"
+   wsrep_sst_method=rsync
+
+
+
+Configuring ``mysqld``
+-----------------------
+
+While all of the configuration parameters available to the standard MySQL,
+MariaDB or Percona XtraDB database server are available in Galera Cluster,
+there are some that you must define an outset to avoid conflict or
+unexpected behavior.
+
+- Ensure that the database server is not bound only to to the localhost,
+  ``127.0.0.1``. Instead, bind it to ``0.0.0.0`` to ensure it listens on
+  all available interfaces.
+
+  .. code-block:: ini
+
+     bind-address=0.0.0.0
+
+- Ensure that the binary log format is set to use row-level replication,
+  as opposed to statement-level replication:
+
+  .. code-block:: ini
+
+     binlog_format=ROW
+
+
+Configuring InnoDB
+-------------------
+
+Galera Cluster does not support non-transactional storage engines and
+requires that you use InnoDB by default. There are some additional
+parameters that you must define to avoid conflicts.
+
+- Ensure that the default storage engine is set to InnoDB:
+
+  .. code-block:: ini
+
+     default_storage_engine=InnoDB
+
+- Ensure that the InnoDB locking mode for generating auto-increment values
+  is set to ``2``, which is the interleaved locking mode.
+
+  .. code-block:: ini
+
+     innodb_autoinc_lock_mode=2
+
+  Do not change this value. Other modes may cause ``INSERT`` statements
+  on tables with auto-increment columns to fail as well as unresolved
+  deadlocks that leave the system unresponsive.
+
+- Ensure that the InnoDB log buffer is written to file once per second,
+  rather than on each commit, to improve performance:
+
+  .. code-block:: ini
+
+     innodb_flush_log_at_trx_commit=0
+
+  Bear in mind, while setting this parameter to ``1`` or ``2`` can improve
+  performance, it introduces certain dangers. Operating system failures can
+  erase the last second of transactions. While you can recover this data
+  from another node, if the cluster goes down at the same time
+  (in the event of a data center power outage), you lose this data permanently.
+
+- Define the InnoDB memory buffer pool size. The default value is 128 MB,
+  but to compensate for Galera Cluster's additional memory usage, scale
+  your usual value back by 5%:
+
+  .. code-block:: ini
+
+     innodb_buffer_pool_size=122M
+
+
+Configuring wsrep replication
+------------------------------
+
+Galera Cluster configuration parameters all have the ``wsrep_`` prefix.
+There are five that you must define for each cluster node in your
+OpenStack database.
+
+- **wsrep Provider** The Galera Replication Plugin serves as the wsrep
+  Provider for Galera Cluster. It is installed on your system as the
+  ``libgalera_smm.so`` file. You must define the path to this file in
+  your ``my.cnf``.
+
+  .. code-block:: ini
+
+     wsrep_provider="/usr/lib/libgalera_smm.so"
+
+- **Cluster Name** Define an arbitrary name for your cluster.
+
+  .. code-block:: ini
+
+     wsrep_cluster_name="my_example_cluster"
+
+  You must use the same name on every cluster node. The connection fails
+  when this value does not match.
+
+- **Cluster Address** List the IP addresses for each cluster node.
+
+  .. code-block:: ini
+
+     wsrep_cluster_address="gcomm://192.168.1.1,192.168.1.2,192.168.1.3"
+
+  Replace the IP addresses given here with comma-separated list of each
+  OpenStack database in your cluster.
+
+- **Node Name** Define the logical name of the cluster node.
+
+  .. code-block:: ini
+
+     wsrep_node_name="Galera1"
+
+- **Node Address** Define the IP address of the cluster node.
+
+  .. code-block:: ini
+
+     wsrep_node_address="192.168.1.1"
+
+
+
+
+Additional parameters
+^^^^^^^^^^^^^^^^^^^^^^
+
+For a complete list of the available parameters, run the
+``SHOW VARIABLES`` command from within the database client:
+
+.. code-block:: mysql
+
+   SHOW VARIABLES LIKE 'wsrep_%';
+
+   +------------------------------+-------+
+   | Variable_name                | Value |
+   +------------------------------+-------+
+   | wsrep_auto_increment_control | ON    |
+   +------------------------------+-------+
+   | wsrep_causal_reads           | OFF   |
+   +------------------------------+-------+
+   | wsrep_certify_nonPK          | ON    |
+   +------------------------------+-------+
+   | ...                          | ...   |
+   +------------------------------+-------+
+   | wsrep_sync_wait              | 0     |
+   +------------------------------+-------+
+
+For the documentation of these parameters, wsrep Provider option and status
+variables available in Galera Cluster, see `Reference
+<http://galeracluster.com/documentation-webpages/reference.html>`_.
--- a/doc/ha-guide/source/controller-ha-galera-install.rst
+++ b/doc/ha-guide/source/controller-ha-galera-install.rst
@@ -0,0 +1,275 @@
+Installation
+=============
+
+Using Galera Cluster requires that you install two packages. The first is
+the database server, which must include the wsrep API patch. The second
+package is the Galera Replication Plugin, which enables the write-set
+replication service functionality with the database server.
+
+There are three implementations of Galera Cluster: MySQL, MariaDB and
+Percona XtraDB. For each implementation, there is a software repository that
+provides binary packages for Debian, Red Hat, and SUSE-based Linux
+distributions.
+
+
+Enabling the repository
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Galera Cluster is not available in the base repositories of Linux
+distributions. In order to install it with your package manage, you must
+first enable the repository on your system. The particular methods for
+doing so vary depending on which distribution you use for OpenStack and
+which database server you want to use.
+
+Debian
+-------
+
+For Debian and Debian-based distributions, such as Ubuntu, complete the
+following steps:
+
+#. Add the GnuPG key for the database repository that you want to use.
+
+   .. code-block:: console
+
+      # apt-key adv --recv-keys --keyserver \
+             keyserver.ubuntu.com BC19DDBA
+
+   Note that the particular key value in this command varies depending on
+   which database software repository you want to use.
+
+   +--------------------------+------------------------+
+   | Database                 | Key                    |
+   +==========================+========================+
+   | Galera Cluster for MySQL | ``BC19DDBA``           |
+   +--------------------------+------------------------+
+   | MariaDB Galera Cluster   | ``0xcbcb082a1bb943db`` |
+   +--------------------------+------------------------+
+   | Percona XtraDB Cluster   | ``1C4CBDCDCD2EFD2A``   |
+   +--------------------------+------------------------+
+
+#. Add the repository to your sources list. Using your preferred text
+   editor, create a ``galera.list`` file in the ``/etc/apt/sources.list.d/``
+   directory. For the contents of this file, use the lines that pertain to
+   the software repository you want to install:
+
+   .. code-block:: linux-config
+
+     # Galera Cluster for MySQL
+     deb http://releases.galeracluster.com/DISTRO RELEASE main
+
+     # MariaDB Galera Cluster
+     deb http://mirror.jmu.edu/pub/mariadb/repo/VERSION/DISTRO RELEASE main
+
+     # Percona XtraDB Cluster
+     deb http://repo.percona.com/apt RELEASE main
+
+   For each entry: Replace all instances of ``DISTRO`` with the distribution
+   that you use, such as ``debian`` or ``ubuntu``. Replace all instances of
+   ``RELEASE`` with the release of that distribution, such as ``wheezy`` or
+   ``trusty``. Replace all instances of ``VERSION`` with the version of the
+   database server that you want to install, such as ``5.6`` or ``10.0``.
+
+   .. note:: In the event that you do not know the release code-name for
+             your distribution, you can use the following command to
+             find it out:
+
+             .. code-block:: console
+
+                $ lsb_release -a
+
+
+#. Update the local cache.
+
+   .. code-block:: console
+
+      # apt-get update
+
+Packages in the Galera Cluster Debian repository are now available for
+installation on your system.
+
+Red Hat
+--------
+
+For Red Hat Enterprise Linux and Red Hat-based Linux distributions, the
+process is more straightforward. In this file, only enter the text for
+the repository you want to use.
+
+- For Galera Cluster for MySQL, using your preferred text editor, create a
+  ``Galera.repo`` file in the ``/etc/yum.repos.d/`` directory.
+
+  .. code-block:: linux-config
+
+     [galera]
+     name = Galera Cluster for MySQL
+     baseurl = http://releases.galeracluster.com/DISTRO/RELEASE/ARCH
+     gpgkey = http://releases.galeracluster.com/GPG-KEY-galeracluster.com
+     gpgcheck = 1
+
+  Replace ``DISTRO`` with the name of the distribution you use, such as
+  ``centos`` or ``fedora``. Replace ``RELEASE`` with the release number,
+  such as ``7`` for CentOS 7. Replace ``ARCH`` with your system
+  architecture, such as ``x86_64``
+
+- For MariaDB Galera Cluster, using your preferred text editor, create a
+  ``Galera.repo`` file in the ``/etc/yum.repos.d/`` directory.
+
+  .. code-block:: linux-config
+
+     [mariadb]
+     name = MariaDB Galera Cluster
+     baseurl = http://yum.mariadb.org/VERSION/PACKAGE
+     gpgkey = https://yum.mariadb.org/RPM-GPG-KEY-MariaDB
+     gpgcheck = 1
+
+  Replace ``VERSION`` with the version of MariaDB you want to install, such
+  as ``5.6`` or ``10.0``. Replace ``PACKAGE`` with the package type and
+  architecture, such as ``rhel6-amd64`` for Red Hat 6 on 64-bit
+  architecture.
+
+- For Percona XtraDB Cluster, run the following command:
+
+  .. code-block:: console
+
+     # yum install http://www.percona.com/downloads/percona-release/redhat/0.1-3/percona-release-0.1-3.noarch.rpm
+
+  Bear in mind that the Percona repository only supports Red Hat Enterprise
+  Linux and CentOS distributions.
+
+Packages in the Galera Cluster Red Hat repository are not available for
+installation on your system.
+
+
+
+SUSE
+-----
+
+For SUSE Enterprise Linux and SUSE-based distributions, such as openSUSE
+binary installations are only available for Galera Cluster for MySQL and
+MariaDB Galera Cluster.
+
+#. Create a ``Galera.repo`` file in the local directory. For Galera Cluster
+   for MySQL, use the following content:
+
+   .. code-block:: linux-config
+
+      [galera]
+      name = Galera Cluster for MySQL
+      baseurl = http://releases.galeracluster.com/DISTRO/RELEASE
+      gpgkey = http://releases.galeracluster.com/GPG-KEY-galeracluster.com
+      gpgcheck = 1
+
+   In the text: Replace ``DISTRO`` with the name of the distribution you
+   use, such as ``sles`` or ``opensuse``. Replace ``RELEASE`` with the
+   version number of that distribution.
+
+   For MariaDB Galera Cluster, instead use this content:
+
+   .. code-block:: linux-config
+
+      [mariadb]
+      name = MariaDB Galera Cluster
+      baseurl = http://yum.mariadb.org/VERSION/PACKAGE
+      gpgkey = https://yum.mariadb.org/RPM-GPG-KEY-MariaDB
+      gpgcheck = 1
+
+   In the text: Replace ``VERSION`` with the version of MariaDB you want to
+   install, such as ``5.6`` or ``10.0``. Replace package with the package
+   architecture you want to use, such as ``opensuse13-amd64``.
+
+#. Add the repository to your system:
+
+   .. code-block:: console
+
+      $ sudo zypper addrepo Galera.repo
+
+#. Refresh ``zypper``:
+
+   .. code-block:: console
+
+      $ sudo zypper refresh
+
+Packages in the Galera Cluster SUSE repository are now available for
+installation.
+
+
+Installing Galera Cluster
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When you finish enabling the software repository for Galera Cluster, you can
+install it using your package manager. The particular command and packages
+you need to install varies depending on which database server you want to
+install and which Linux distribution you use:
+
+Galera Cluster for MySQL:
+
+
+- For Debian and Debian-based distributions, such as Ubuntu, run the
+  following command:
+
+  .. code-block:: console
+
+     # apt-get install galera-3 mysql-wsrep-5.6
+
+- For Red Hat Enterprise Linux and Red Hat-based distributions, such as
+  Fedora or CentOS, instead run this command:
+
+  .. code-block:: console
+
+     # yum install galera-3 mysql-wsrep-5.6
+
+- For SUSE Enterprise Linux Server and SUSE-based distributions, such as
+  openSUSE, instead run this command:
+
+  .. code-block:: console
+
+     # zypper install galera-3 mysql-wsrep-5.6
+
+
+MariaDB Galera Cluster:
+
+- For Debian and Debian-based distributions, such as Ubuntu, run the
+  following command:
+
+  .. code-block:: console
+
+     # apt-get install galera mariadb-galera-server
+
+- For Red Hat Enterprise Linux and Red Hat-based distributions, such as
+  Fedora or CentOS, instead run this command:
+
+  .. code-block:: console
+
+     # yum install galera MariaDB-Galera-server
+
+- For SUSE Enterprise Linux Server and SUSE-based distributions, such as
+  openSUSE, instead run this command:
+
+  .. code-block:: console
+
+     # zypper install galera MariaDB-Galera-server
+
+
+Percona XtraDB Cluster:
+
+
+- For Debian and Debian-based distributions, such as Ubuntu, run the
+  following command:
+
+  .. code-block:: console
+
+     # apt-get install percona-xtradb-cluster
+
+- For Red Hat Enterprise Linux and Red Hat-based distributions, such as
+  Fedora or CentOS, instead run this command:
+
+ .. code-block:: console
+
+   # yum install Percona-XtraDB-Cluster
+
+Galera Cluster is now installed on your system. You must repeat this
+process for each controller node in your cluster.
+
+.. warning:: In the event that you already installed the standalone version
+             of MySQL, MariaDB or Percona XtraDB, this installation purges all
+             privileges on your OpenStack database server. You must reapply the
+             privileges listed in the installation guide.
--- a/doc/ha-guide/source/controller-ha-galera-manage.rst
+++ b/doc/ha-guide/source/controller-ha-galera-manage.rst
@@ -0,0 +1,255 @@
+Management
+===========
+
+When you finish the installation and configuration process on each
+cluster node in your OpenStack database, you can initialize Galera Cluster.
+
+Before you attempt this, verify that you have the following ready:
+
+- Database hosts with Galera Cluster installed. You need a
+  minimum of three hosts;
+- No firewalls between the hosts;
+- SELinux and AppArmor set to permit access to ``mysqld``;
+- The correct path to ``libgalera_smm.so`` given to the
+  ``wsrep_provider`` parameter.
+
+Initializing the cluster
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In Galera Cluster, the Primary Component is the cluster of database
+servers that replicate into each other. In the event that a
+cluster node loses connectivity with the Primary Component, it
+defaults into a non-operational state, to avoid creating or serving
+inconsistent data.
+
+By default, cluster nodes do not start as part of a Primary
+Component. Instead they assume that one exists somewhere and
+attempts to establish a connection with it. To create a Primary
+Component, you must start one cluster node using the
+``--wsrep-new-cluster`` option. You can do this using any cluster
+node, it is not important which you choose. In the Primary
+Component, replication and state transfers bring all databases to
+the same state.
+
+To start the cluster, complete the following steps:
+
+#. Initialize the Primary Component on one cluster node. For
+   servers that use ``init``, run the following command:
+
+   .. code-block:: console
+
+      # service mysql start --wsrep-new-cluster
+
+   For servers that use ``systemd``, instead run this command:
+
+   .. code-block:: console
+
+      # systemctl start mysql --wsrep-new-cluster
+
+#. Once the database server starts, check the cluster status using
+   the ``wsrep_cluster_size`` status variable. From the database
+   client, run the following command:
+
+   .. code-block:: mysql
+
+      SHOW STATUS LIKE 'wsrep_cluster_size';
+
+      +--------------------+-------+
+      | Variable_name      | Value |
+      +--------------------+-------+
+      | wsrep_cluster_size | 1     |
+      +--------------------+-------+
+
+#. Start the database server on all other cluster nodes. For
+   servers that use ``init``, run the following command:
+
+   .. code-block:: console
+
+      # service mysql start
+
+   For servers that use ``systemd``, instead run this command:
+
+   .. code-block:: console
+
+      # systemctl start mysql
+
+#. When you have all cluster nodes started, log into the database
+   client on one of them and check the ``wsrep_cluster_size``
+   status variable again.
+
+   .. code-block:: mysql
+
+      SHOW STATUS LIKE 'wsrep_cluster_size';
+
+      +--------------------+-------+
+      | Variable_name      | Value |
+      +--------------------+-------+
+      | wsrep_cluster_size | 3     |
+      +--------------------+-------+
+
+When each cluster node starts, it checks the IP addresses given to
+the ``wsrep_cluster_address`` parameter and attempts to establish
+network connectivity with a database server running there. Once it
+establishes a connection, it attempts to join the Primary
+Component, requesting a state transfer as needed to bring itself
+into sync with the cluster.
+
+In the event that you need to restart any cluster node, you can do
+so. When the database server comes back it, it establishes
+connectivity with the Primary Component and updates itself to any
+changes it may have missed while down.
+
+
+Restarting the cluster
+-----------------------
+
+Individual cluster nodes can stop and be restarted without issue.
+When a database loses its connection or restarts, Galera Cluster
+brings it back into sync once it reestablishes connection with the
+Primary Component. In the event that you need to restart the
+entire cluster, identify the most advanced cluster node and
+initialize the Primary Component on that node.
+
+To find the most advanced cluster node, you need to check the
+sequence numbers, or seqnos, on the last committed transaction for
+each. You can find this by viewing ``grastate.dat`` file in
+database directory,
+
+.. code-block:: console
+
+   $ cat /path/to/datadir/grastate.dat
+
+   # Galera saved state
+   version: 3.8
+   uuid:    5ee99582-bb8d-11e2-b8e3-23de375c1d30
+   seqno:   8204503945773
+
+Alternatively, if the database server is running, use the
+``wsrep_last_committed`` status variable:
+
+.. code-block:: mysql
+
+   SHOW STATUS LIKE 'wsrep_last_committed';
+
+   +----------------------+--------+
+   | Variable_name        | Value  |
+   +----------------------+--------+
+   | wsrep_last_committed | 409745 |
+   +----------------------+--------+
+
+This value increments with each transaction, so the most advanced
+node has the highest sequence number, and therefore is the most up to date.
+
+
+Configuration tips
+~~~~~~~~~~~~~~~~~~~
+
+
+Deployment strategies
+----------------------
+
+Galera can be configured using one of the following
+strategies:
+
+- Each instance has its own IP address;
+
+  OpenStack services are configured with the list of these IP
+  addresses so they can select one of the addresses from those
+  available.
+
+- Galera runs behind HAProxy.
+
+  HAProxy load balances incoming requests and exposes just one IP
+  address for all the clients.
+
+  Galera synchronous replication guarantees a zero slave lag. The
+  failover procedure completes once HAProxy detects that the active
+  back end has gone down and switches to the backup one, which is
+  then marked as 'UP'. If no back ends are up (in other words, the
+  Galera cluster is not ready to accept connections), the failover
+  procedure finishes only when the Galera cluster has been
+  successfully reassembled. The SLA is normally no more than 5
+  minutes.
+
+- Use MySQL/Galera in active/passive mode to avoid deadlocks on
+  ``SELECT ... FOR UPDATE`` type queries (used, for example, by nova
+  and neutron). This issue is discussed more in the following:
+
+  - http://lists.openstack.org/pipermail/openstack-dev/2014-May/035264.html
+  - http://www.joinfu.com/
+
+Of these options, the second one is highly recommended. Although Galera
+supports active/active configurations, we recommend active/passive
+(enforced by the load balancer) in order to avoid lock contention.
+
+
+
+Configuring HAProxy
+--------------------
+
+If you use HAProxy for load-balancing client access to Galera
+Cluster as described in the :doc:`controller-ha-haproxy`, you can
+use the ``clustercheck`` utility to improve health checks.
+
+#. Create a configuration file for ``clustercheck`` at
+   ``/etc/sysconfig/clustercheck``:
+
+   .. code-block:: ini
+
+      MYSQL_USERNAME="clustercheck_user"
+      MYSQL_PASSWORD="my_clustercheck_password"
+      MYSQL_HOST="localhost"
+      MYSQL_PORT="3306"
+
+#. Log in to the database client and grant the ``clustercheck`` user
+   ``PROCESS`` privileges.
+
+   .. code-block:: mysql
+
+      GRANT PROCESS ON *.* TO 'clustercheck_user'@'localhost'
+      IDENTIFIED BY 'my_clustercheck_password';
+
+      FLUSH PRIVILEGES;
+
+   You only need to do this on one cluster node. Galera Cluster
+   replicates the user to all the others.
+
+#. Create a configuration file for the HAProxy monitor service, at
+   ``/etc/xinetd.d/galera-monitor``:
+
+   .. code-block:: ini
+
+      service galera-monitor {
+         port = 9200
+         disable = no
+         socket_type = stream
+         protocol = tcp
+         wait = no
+         user = root
+         group = root
+         groups = yes
+         server = /usr/bin/clustercheck
+         type = UNLISTED
+         per_source = UNLIMITED
+         log_on_success =
+         log_on_failure = HOST
+         flags = REUSE
+      }
+
+#. Start the ``xinetd`` daemon for ``clustercheck``. For servers
+   that use ``init``, run the following commands:
+
+   .. code-block:: console
+
+      # service xinetd enable
+      # service xinetd start
+
+   For servers that use ``systemd``, instead run these commands:
+
+   .. code-block:: console
+
+      # systemctl daemon-reload
+      # systemctl enable xinetd
+      # systemctl start xinetd
+
+
--- a/doc/ha-guide/source/controller-ha-galera.rst
+++ b/doc/ha-guide/source/controller-ha-galera.rst
@@ -0,0 +1,33 @@
+Database (Galera Cluster)
+==========================
+
+The first step is to install the database that sits at the heart of the
+cluster. To implement high availability, run an instance of the database on
+each controller node and use Galera Cluster to provide replication between
+them. Galera Cluster is a synchronous multi-master database cluster, based
+on MySQL and the InnoDB storage engine. It is a high-availability service
+that provides high system uptime, no data loss, and scalability for growth.
+
+You can achieve high availability for the OpenStack database in many
+different ways, depending on the type of database that you want to use.
+There are three implementations of Galera Cluster available to you:
+
+- `Galera Cluster for MySQL <http://galeracluster.com/>`_ The MySQL
+  reference implementation from Codership, Oy;
+- `MariaDB Galera Cluster <https://mariadb.org/>`_ The MariaDB
+  implementation of Galera Cluster, which is commonly supported in
+  environments based on Red Hat distributions;
+- `Percona XtraDB Cluster <http://www.percona.com/>`_ The XtraDB
+  implementation of Galera Cluster from Percona.
+
+In addition to Galera Cluster, you can also achieve high availability
+through other database options, such as PostgreSQL, which has its own
+replication system.
+
+
+.. toctree::
+  :maxdepth: 2
+
+  controller-ha-galera-install
+  controller-ha-galera-config
+  controller-ha-galera-manage
--- a/doc/ha-guide/source/controller-ha-haproxy.rst
+++ b/doc/ha-guide/source/controller-ha-haproxy.rst
@@ -0,0 +1,229 @@
+=======
+HAProxy
+=======
+
+HAProxy provides a fast and reliable HTTP reverse proxy and load balancer
+for TCP or HTTP applications. It is particularly suited for web crawling
+under very high loads while needing persistence or Layer 7 processing.
+It realistically supports tens of thousands of connections with recent
+hardware.
+
+Each instance of HAProxy configures its front end to accept connections
+only from the virtual IP (VIP) address and to terminate them as a list
+of all instances of the corresponding service under load balancing,
+such as any OpenStack API service.
+
+This makes the instances of HAProxy act independently and fail over
+transparently together with the network endpoints (VIP addresses)
+failover and, therefore, shares the same SLA.
+
+You can alternatively use a commercial load balancer, which is a hardware
+or software. A hardware load balancer generally has good performance.
+
+For detailed instructions about installing HAProxy on your nodes,
+see its `official documentation <http://www.haproxy.org/#docs>`_.
+
+.. note::
+
+   HAProxy should not be a single point of failure.
+   It is advisable to have multiple HAProxy instances running,
+   where the number of these instances is a small odd number like 3 or 5.
+   You need to ensure its availability by other means,
+   such as Keepalived or Pacemaker.
+
+The common practice is to locate an HAProxy instance on each OpenStack
+controller in the environment.
+
+Once configured (see example file below), add HAProxy to the cluster
+and ensure the VIPs can only run on machines where HAProxy is active:
+
+``pcs``
+
+.. code-block:: console
+
+   $ pcs resource create lb-haproxy systemd:haproxy --clone
+   $ pcs constraint order start p_api-ip then lb-haproxy-clone kind=Optional
+   $ pcs constraint colocation add p_api-ip with lb-haproxy-clone
+
+``crmsh``
+
+TBA
+
+Example Config File
+~~~~~~~~~~~~~~~~~~~
+
+Here is an example ``/etc/haproxy/haproxy.cfg`` configuration file.
+You need a copy of it on each controller node.
+
+.. note::
+
+   To implement any changes made to this you must restart the HAProxy service
+
+.. code-block:: none
+
+   global
+     chroot  /var/lib/haproxy
+     daemon
+     group  haproxy
+     maxconn  4000
+     pidfile  /var/run/haproxy.pid
+     user  haproxy
+
+   defaults
+     log  global
+     maxconn  4000
+     option  redispatch
+     retries  3
+     timeout  http-request 10s
+     timeout  queue 1m
+     timeout  connect 10s
+     timeout  client 1m
+     timeout  server 1m
+     timeout  check 10s
+
+   listen dashboard_cluster
+     bind <Virtual IP>:443
+     balance  source
+     option  tcpka
+     option  httpchk
+     option  tcplog
+     server controller1 10.0.0.12:443 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:443 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:443 check inter 2000 rise 2 fall 5
+
+   listen galera_cluster
+     bind <Virtual IP>:3306
+     balance  source
+     option  httpchk
+     server controller1 10.0.0.12:3306 check port 9200 inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:3306 backup check port 9200 inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:3306 backup check port 9200 inter 2000 rise 2 fall 5
+
+   listen glance_api_cluster
+     bind <Virtual IP>:9292
+     balance  source
+     option  tcpka
+     option  httpchk
+     option  tcplog
+     server controller1 10.0.0.12:9292 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:9292 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:9292 check inter 2000 rise 2 fall 5
+
+   listen glance_registry_cluster
+     bind <Virtual IP>:9191
+     balance  source
+     option  tcpka
+     option  tcplog
+     server controller1 10.0.0.12:9191 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:9191 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:9191 check inter 2000 rise 2 fall 5
+
+   listen keystone_admin_cluster
+     bind <Virtual IP>:35357
+     balance  source
+     option  tcpka
+     option  httpchk
+     option  tcplog
+     server controller1 10.0.0.12:35357 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:35357 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:35357 check inter 2000 rise 2 fall 5
+
+   listen keystone_public_internal_cluster
+     bind <Virtual IP>:5000
+     balance  source
+     option  tcpka
+     option  httpchk
+     option  tcplog
+     server controller1 10.0.0.12:5000 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:5000 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:5000 check inter 2000 rise 2 fall 5
+
+   listen nova_ec2_api_cluster
+     bind <Virtual IP>:8773
+     balance  source
+     option  tcpka
+     option  tcplog
+     server controller1 10.0.0.12:8773 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:8773 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:8773 check inter 2000 rise 2 fall 5
+
+   listen nova_compute_api_cluster
+     bind <Virtual IP>:8774
+     balance  source
+     option  tcpka
+     option  httpchk
+     option  tcplog
+     server controller1 10.0.0.12:8774 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:8774 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:8774 check inter 2000 rise 2 fall 5
+
+   listen nova_metadata_api_cluster
+     bind <Virtual IP>:8775
+     balance  source
+     option  tcpka
+     option  tcplog
+     server controller1 10.0.0.12:8775 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:8775 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:8775 check inter 2000 rise 2 fall 5
+
+   listen cinder_api_cluster
+     bind <Virtual IP>:8776
+     balance  source
+     option  tcpka
+     option  httpchk
+     option  tcplog
+     server controller1 10.0.0.12:8776 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:8776 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:8776 check inter 2000 rise 2 fall 5
+
+   listen ceilometer_api_cluster
+     bind <Virtual IP>:8777
+     balance  source
+     option  tcpka
+     option  tcplog
+     server controller1 10.0.0.12:8777 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:8777 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:8777 check inter 2000 rise 2 fall 5
+
+   listen nova_vncproxy_cluster
+     bind <Virtual IP>:6080
+     balance  source
+     option  tcpka
+     option  tcplog
+     server controller1 10.0.0.12:6080 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:6080 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:6080 check inter 2000 rise 2 fall 5
+
+   listen neutron_api_cluster
+     bind <Virtual IP>:9696
+     balance  source
+     option  tcpka
+     option  httpchk
+     option  tcplog
+     server controller1 10.0.0.12:9696 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:9696 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:9696 check inter 2000 rise 2 fall 5
+
+   listen swift_proxy_cluster
+     bind <Virtual IP>:8080
+     balance  source
+     option  tcplog
+     option  tcpka
+     server controller1 10.0.0.12:8080 check inter 2000 rise 2 fall 5
+     server controller2 10.0.0.13:8080 check inter 2000 rise 2 fall 5
+     server controller3 10.0.0.14:8080 check inter 2000 rise 2 fall 5
+
+.. note::
+
+   The Galera cluster configuration directive ``backup`` indicates
+   that two of the three controllers are standby nodes.
+   This ensures that only one node services write requests
+   because OpenStack support for multi-node writes is not yet production-ready.
+
+.. note::
+
+   The Telemetry API service configuration does not have the ``option httpchk``
+   directive as it cannot process this check properly.
+   TODO: explain why the Telemetry API is so special
+
+[TODO: we need more commentary about the contents and format of this file]
--- a/doc/ha-guide/source/controller-ha-keystone.rst
+++ b/doc/ha-guide/source/controller-ha-keystone.rst
@@ -0,0 +1,147 @@
+
+============================
+Identity services (keystone)
+============================
+
+OpenStack Identity (keystone)
+is the Identity service in OpenStack that is used by many services.
+You should be familiar with
+`OpenStack identity concepts
+<http://docs.openstack.org/liberty/install-guide-ubuntu/common/get_started_identity.html>`_
+before proceeding.
+
+Making the OpenStack Identity service highly available
+in active / passive mode involves:
+
+- :ref:`keystone-pacemaker`
+- :ref:`keystone-config-identity`
+- :ref:`keystone-services-config`
+
+.. _keystone-pacemaker:
+
+Add OpenStack Identity resource to Pacemaker
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+#. You must first download the OpenStack Identity resource to Pacemaker
+   by running the following commands:
+
+   .. code-block:: console
+
+      # cd /usr/lib/ocf/resource.d
+      # mkdir openstack
+      # cd openstack
+      # wget https://git.openstack.org/cgit/openstack/openstack-resource-agents/plain/ocf/keystone
+      # chmod a+rx *
+
+#. You can now add the Pacemaker configuration
+   for the OpenStack Identity resource
+   by running the :command:`crm configure` command
+   to connect to the Pacemaker cluster.
+   Add the following cluster resources:
+
+   ::
+
+      primitive p_keystone ocf:openstack:keystone \
+      params config="/etc/keystone/keystone.conf"
+          os_password="secretsecret" \
+          os_username="admin"
+          os_tenant_name="admin"
+          os_auth_url="http://10.0.0.11:5000/v2.0/" \
+          op monitor interval="30s" timeout="30s"
+
+   This configuration creates ``p_keystone``,
+   a resource for managing the OpenStack Identity service.
+
+   :command:`crm configure` supports batch input
+   so you may copy and paste the above lines
+   into your live Pacemaker configuration,
+   and then make changes as required.
+   For example, you may enter edit ``p_ip_keystone``
+   from the :command:`crm configure` menu
+   and edit the resource to match your preferred virtual IP address.
+
+#. After you add these resources,
+   commit your configuration changes by entering :command:`commit`
+   from the :command:`crm configure` menu.
+   Pacemaker then starts the OpenStack Identity service
+   and its dependent resources on one of your nodes.
+
+.. _keystone-config-identity:
+
+Configure OpenStack Identity service
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+#. Edit the :file:`keystone.conf` file
+   to change the values of the :manpage:`bind(2)` parameters:
+
+   .. code-block:: ini
+
+      bind_host = 10.0.0.11
+      public_bind_host = 10.0.0.11
+      admin_bind_host = 10.0.0.11
+
+   The ``admin_bind_host`` parameter
+   lets you use a private network for admin access.
+
+#. To be sure that all data is highly available,
+   ensure that everything is stored in the MySQL database
+   (which is also highly available):
+
+   .. code-block:: ini
+
+      [catalog]
+      driver = keystone.catalog.backends.sql.Catalog
+      ...
+      [identity]
+      driver = keystone.identity.backends.sql.Identity
+      ...
+
+
+.. _keystone-services-config:
+
+Configure OpenStack services to use the highly available OpenStack Identity
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Your OpenStack services must now point
+their OpenStack Identity configuration
+to the highly available virtual cluster IP address
+rather than point to the physical IP address
+of an OpenStack Identity server as you would do
+in a non-HA environment.
+
+#. For OpenStack Compute, for example,
+   if your OpenStack Identiy service IP address is 10.0.0.11,
+   use the following configuration in your :file:`api-paste.ini` file:
+
+   .. code-block:: ini
+
+      auth_host = 10.0.0.11
+
+#. You also need to create the OpenStack Identity Endpoint
+   with this IP address.
+
+   .. note::
+
+      If you are using both private and public IP addresses,
+      you should create two virtual IP addresses
+      and define your endpoint like this:
+
+      .. code-block:: console
+
+         $ openstack endpoint create --region $KEYSTONE_REGION \
+           $service-type public http://PUBLIC_VIP:5000/v2.0
+         $ openstack endpoint create --region $KEYSTONE_REGION \
+           $service-type admin http://10.0.0.11:35357/v2.0
+         $ openstack endpoint create --region $KEYSTONE_REGION \
+           $service-type internal http://10.0.0.11:5000/v2.0
+
+
+#. If you are using the horizon dashboard,
+   edit the :file:`local_settings.py` file
+   to include the following:
+
+   .. code-block:: ini
+
+      OPENSTACK_HOST = 10.0.0.11
+
+
--- a/doc/ha-guide/source/controller-ha-memcached.rst
+++ b/doc/ha-guide/source/controller-ha-memcached.rst
@@ -0,0 +1,21 @@
+===================
+Memcached
+===================
+
+Memcached is a general-purpose distributed memory caching system. It
+is used to speed up dynamic database-driven websites by caching data
+and objects in RAM to reduce the number of times an external data
+source must be read.
+
+Memcached is a memory cache demon that can be used by most OpenStack
+services to store ephemeral data, such as tokens.
+
+Access to memcached is not handled by HAproxy because replicated
+access is currently only in an experimental state.  Instead OpenStack
+services must be supplied with the full list of hosts running
+memcached.
+
+The Memcached client implements hashing to balance objects among the
+instances.  Failure of an instance only impacts a percentage of the
+objects and the client automatically removes it from the list of
+instances.  The SLA is several minutes.
--- a/doc/ha-guide/source/controller-ha-pacemaker.rst
+++ b/doc/ha-guide/source/controller-ha-pacemaker.rst
@@ -0,0 +1,597 @@
+=======================
+Pacemaker cluster stack
+=======================
+
+`Pacemaker <http://clusterlabs.org/>`_ cluster stack is the state-of-the-art
+high availability and load balancing stack for the Linux platform.
+Pacemaker is useful to make OpenStack infrastructure highly available.
+Also, it is storage and application-agnostic, and in no way
+specific to OpenStack.
+
+Pacemaker relies on the
+`Corosync <http://corosync.github.io/corosync/>`_ messaging layer
+for reliable cluster communications.
+Corosync implements the Totem single-ring ordering and membership protocol.
+It also provides UDP and InfiniBand based messaging,
+quorum, and cluster membership to Pacemaker.
+
+Pacemaker does not inherently (need or want to) understand the
+applications it manages. Instead, it relies on resource agents (RAs),
+scripts that encapsulate the knowledge of how to start, stop, and
+check the health of each application managed by the cluster.
+
+These agents must conform to one of the `OCF <https://github.com/ClusterLabs/
+OCF-spec/blob/master/ra/resource-agent-api.md>`_,
+`SysV Init <http://refspecs.linux-foundation.org/LSB_3.0.0/LSB-Core-generic/
+LSB-Core-generic/iniscrptact.html>`_, Upstart, or Systemd standards.
+
+Pacemaker ships with a large set of OCF agents (such as those managing
+MySQL databases, virtual IP addresses, and RabbitMQ), but can also use
+any agents already installed on your system and can be extended with
+your own (see the
+`developer guide <http://www.linux-ha.org/doc/dev-guides/ra-dev-guide.html>`_).
+
+The steps to implement the Pacemaker cluster stack are:
+
+- :ref:`pacemaker-install`
+- :ref:`pacemaker-corosync-setup`
+- :ref:`pacemaker-corosync-start`
+- :ref:`pacemaker-start`
+- :ref:`pacemaker-cluster-properties`
+
+.. _pacemaker-install:
+
+Install packages
+~~~~~~~~~~~~~~~~
+
+On any host that is meant to be part of a Pacemaker cluster,
+you must first establish cluster communications
+through the Corosync messaging layer.
+This involves installing the following packages
+(and their dependencies, which your package manager
+usually installs automatically):
+
+- pacemaker
+
+- pcs (CentOS or RHEL) or crmsh
+
+- corosync
+
+- fence-agents (CentOS or RHEL) or cluster-glue
+
+- resource-agents
+
+- libqb0
+
+.. _pacemaker-corosync-setup:
+
+Set up the cluster with `pcs`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+#. Make sure pcs is running and configured to start at boot time:
+
+   .. code-block:: console
+
+      $ systemctl enable pcsd
+      $ systemctl start pcsd
+
+#. Set a password for hacluster user **on each host**.
+
+   Since the cluster is a single administrative domain, it is generally
+   accepted to use the same password on all nodes.
+
+   .. code-block:: console
+
+      $ echo my-secret-password-no-dont-use-this-one \
+        | passwd --stdin hacluster
+
+#. Use that password to authenticate to the nodes which will
+   make up the cluster. The :option:`-p` option is used to give
+   the password on command line and makes it easier to script.
+
+   .. code-block:: console
+
+      $ pcs cluster auth controller1 controller2 controller3 \
+        -u hacluster -p my-secret-password-no-dont-use-this-one --force
+
+#. Create the cluster, giving it a name, and start it:
+
+   .. code-block:: console
+
+      $ pcs cluster setup --force --name my-first-openstack-cluster \
+        controller1 controller2 controller3
+      $ pcs cluster start --all
+
+.. note ::
+
+   In Red Hat Enterprise Linux or CentOS environments, this is a recommended
+   path to perform configuration. For more information, see the `RHEL docs
+   <https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/7/html/High_Availability_Add-On_Reference/ch-clusteradmin-HAAR.html#s1-clustercreate-HAAR>`_.
+
+Set up the cluster with `crmsh`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+After installing the Corosync package, you must create
+the :file:`/etc/corosync/corosync.conf` configuration file.
+
+.. note::
+         For Ubuntu, you should also enable the Corosync service
+         in the ``/etc/default/corosync`` configuration file.
+
+Corosync can be configured to work
+with either multicast or unicast IP addresses
+or to use the votequorum library.
+
+- :ref:`corosync-multicast`
+- :ref:`corosync-unicast`
+- :ref:`corosync-votequorum`
+
+.. _corosync-multicast:
+
+Set up Corosync with multicast
+------------------------------
+
+Most distributions ship an example configuration file
+(:file:`corosync.conf.example`)
+as part of the documentation bundled with the Corosync package.
+An example Corosync configuration file is shown below:
+
+**Example Corosync configuration file for multicast (corosync.conf)**
+
+.. code-block:: ini
+
+   totem {
+         version: 2
+
+         # Time (in ms) to wait for a token (1)
+         token: 10000
+
+        # How many token retransmits before forming a new
+        # configuration
+        token_retransmits_before_loss_const: 10
+
+        # Turn off the virtual synchrony filter
+        vsftype: none
+
+        # Enable encryption (2)
+        secauth: on
+
+        # How many threads to use for encryption/decryption
+        threads: 0
+
+        # This specifies the redundant ring protocol, which may be
+        # none, active, or passive. (3)
+        rrp_mode: active
+
+        # The following is a two-ring multicast configuration. (4)
+        interface {
+                ringnumber: 0
+                bindnetaddr: 10.0.0.0
+                mcastaddr: 239.255.42.1
+                mcastport: 5405
+        }
+        interface {
+                ringnumber: 1
+                bindnetaddr: 10.0.42.0
+                mcastaddr: 239.255.42.2
+                mcastport: 5405
+        }
+   }
+
+   amf {
+        mode: disabled
+   }
+
+   service {
+           # Load the Pacemaker Cluster Resource Manager (5)
+           ver:       1
+           name:      pacemaker
+   }
+
+   aisexec {
+           user:   root
+           group:  root
+   }
+
+   logging {
+           fileline: off
+           to_stderr: yes
+           to_logfile: no
+           to_syslog: yes
+           syslog_facility: daemon
+           debug: off
+           timestamp: on
+           logger_subsys {
+                   subsys: AMF
+                   debug: off
+                   tags: enter|leave|trace1|trace2|trace3|trace4|trace6
+           }}
+
+Note the following:
+
+- The ``token`` value specifies the time, in milliseconds,
+  during which the Corosync token is expected
+  to be transmitted around the ring.
+  When this timeout expires, the token is declared lost,
+  and after ``token_retransmits_before_loss_const lost`` tokens,
+  the non-responding processor (cluster node) is declared dead.
+  In other words, ``token × token_retransmits_before_loss_const``
+  is the maximum time a node is allowed to not respond to cluster messages
+  before being considered dead.
+  The default for token is 1000 milliseconds (1 second),
+  with 4 allowed retransmits.
+  These defaults are intended to minimize failover times,
+  but can cause frequent "false alarms" and unintended failovers
+  in case of short network interruptions. The values used here are safer,
+  albeit with slightly extended failover times.
+
+- With ``secauth`` enabled,
+  Corosync nodes mutually authenticate using a 128-byte shared secret
+  stored in the :file:`/etc/corosync/authkey` file,
+  which may be generated with the :command:`corosync-keygen` utility.
+  When using ``secauth``, cluster communications are also encrypted.
+
+- In Corosync configurations using redundant networking
+  (with more than one interface),
+  you must select a Redundant Ring Protocol (RRP) mode other than none.
+  ``active`` is the recommended RRP mode.
+
+  Note the following about the recommended interface configuration:
+
+  - Each configured interface must have a unique ``ringnumber``,
+    starting with 0.
+
+  - The ``bindnetaddr`` is the network address of the interfaces to bind to.
+    The example uses two network addresses of /24 IPv4 subnets.
+
+  - Multicast groups (``mcastaddr``) must not be reused
+    across cluster boundaries.
+    In other words, no two distinct clusters
+    should ever use the same multicast group.
+    Be sure to select multicast addresses compliant with
+    `RFC 2365, "Administratively Scoped IP Multicast"
+    <http://www.ietf.org/rfc/rfc2365.txt>`_.
+
+  - For firewall configurations,
+    note that Corosync communicates over UDP only,
+    and uses ``mcastport`` (for receives)
+    and ``mcastport - 1`` (for sends).
+
+- The service declaration for the pacemaker service
+  may be placed in the :file:`corosync.conf` file directly
+  or in its own separate file, :file:`/etc/corosync/service.d/pacemaker`.
+
+  .. note::
+
+           If you are using Corosync version 2 on Ubuntu 14.04,
+           remove or comment out lines under the service stanza,
+           which enables Pacemaker to start up. Another potential
+           problem is the boot and shutdown order of Corosync and
+           Pacemaker. To force Pacemaker to start after Corosync and
+           stop before Corosync, fix the start and kill symlinks manually:
+
+           .. code-block:: console
+
+              # update-rc.d pacemaker start 20 2 3 4 5 . stop 00 0 1 6 .
+
+           The Pacemaker service also requires an additional
+           configuration file ``/etc/corosync/uidgid.d/pacemaker``
+           to be created with the following content:
+
+           .. code-block:: ini
+
+              uidgid {
+                uid: hacluster
+                gid: haclient
+              }
+
+- Once created, the :file:`corosync.conf` file
+  (and the :file:`authkey` file if the secauth option is enabled)
+  must be synchronized across all cluster nodes.
+
+.. _corosync-unicast:
+
+Set up Corosync with unicast
+----------------------------
+
+For environments that do not support multicast,
+Corosync should be configured for unicast.
+An example fragment of the :file:`corosync.conf` file
+for unicastis shown below:
+
+**Corosync configuration file fragment for unicast (corosync.conf)**
+
+.. code-block:: ini
+
+   totem {
+           #...
+           interface {
+                   ringnumber: 0
+                   bindnetaddr: 10.0.0.0
+                   broadcast: yes (1)
+                   mcastport: 5405
+           }
+           interface {
+                   ringnumber: 1
+                   bindnetaddr: 10.0.42.0
+                   broadcast: yes
+                   mcastport: 5405
+           }
+           transport: udpu (2)
+   }
+
+   nodelist { (3)
+           node {
+                   ring0_addr: 10.0.0.12
+                   ring1_addr: 10.0.42.12
+                   nodeid: 1
+           }
+           node {
+                   ring0_addr: 10.0.0.13
+                   ring1_addr: 10.0.42.13
+                   nodeid: 2
+           }
+           node {
+                   ring0_addr: 10.0.0.14
+                   ring1_addr: 10.0.42.14
+                   nodeid: 3
+           }
+   }
+   #...
+
+Note the following:
+
+- If the ``broadcast`` parameter is set to yes,
+  the broadcast address is used for communication.
+  If this option is set, the ``mcastaddr`` parameter should not be set.
+
+- The ``transport`` directive controls the transport mechanism used.
+  To avoid the use of multicast entirely,
+  specify the ``udpu`` unicast transport parameter.
+  This requires specifying the list of members
+  in the ``nodelist`` directive;
+  this could potentially make up the membership before deployment.
+  The default is ``udp``.
+  The transport type can also be set to ``udpu`` or ``iba``.
+
+- Within the ``nodelist`` directive,
+  it is possible to specify specific information
+  about the nodes in the cluster.
+  The directive can contain only the node sub-directive,
+  which specifies every node that should be a member of the membership,
+  and where non-default options are needed.
+  Every node must have at least the ``ring0_addr`` field filled.
+
+  .. note::
+
+           For UDPU, every node that should be a member
+           of the membership must be specified.
+
+  Possible options are:
+
+  - ``ring{X}_addr`` specifies the IP address of one of the nodes.
+    {X} is the ring number.
+
+  - ``nodeid`` is optional
+    when using IPv4 and required when using IPv6.
+    This is a 32-bit value specifying the node identifier
+    delivered to the cluster membership service.
+    If this is not specified with IPv4,
+    the node id is determined from the 32-bit IP address
+    of the system to which the system is bound with ring identifier of 0.
+    The node identifier value of zero is reserved and should not be used.
+
+
+.. _corosync-votequorum:
+
+Set up Corosync with votequorum library
+---------------------------------------
+
+The votequorum library is part of the corosync project.
+It provides an interface to the vote-based quorum service
+and it must be explicitly enabled in the Corosync configuration file.
+The main role of votequorum library is to avoid split-brain situations,
+but it also provides a mechanism to:
+
+- Query the quorum status
+
+- Get a list of nodes known to the quorum service
+
+- Receive notifications of quorum state changes
+
+- Change the number of votes assigned to a node
+
+- Change the number of expected votes for a cluster to be quorate
+
+- Connect an additional quorum device
+  to allow small clusters remain quorate during node outages
+
+The votequorum library has been created to replace and eliminate
+qdisk, the disk-based quorum daemon for CMAN,
+from advanced cluster configurations.
+
+A sample votequorum service configuration
+in the :file:`corosync.conf` file is:
+
+.. code-block:: ini
+
+   quorum {
+           provider: corosync_votequorum (1)
+           expected_votes: 7 (2)
+           wait_for_all: 1 (3)
+           last_man_standing: 1 (4)
+           last_man_standing_window: 10000 (5)
+          }
+
+Note the following:
+
+- Specifying ``corosync_votequorum`` enables the votequorum library;
+  this is the only required option.
+
+- The cluster is fully operational with ``expected_votes`` set to 7 nodes
+  (each node has 1 vote), quorum: 4.
+  If a list of nodes is specified as ``nodelist``,
+  the ``expected_votes`` value is ignored.
+
+- Setting ``wait_for_all`` to 1 means that,
+  When starting up a cluster (all nodes down),
+  the cluster quorum is held until all nodes are online
+  and have joined the cluster for the first time.
+  This parameter is new in Corosync 2.0.
+
+- Setting ``last_man_standing`` to 1 enables
+  the Last Man Standing (LMS) feature;
+  by default, it is disabled (set to 0).
+  If a cluster is on the quorum edge
+  (``expected_votes:`` set to 7; ``online nodes:`` set to 4)
+  for longer than the time specified
+  for the ``last_man_standing_window`` parameter,
+  the cluster can recalculate quorum and continue operating
+  even if the next node will be lost.
+  This logic is repeated until the number of online nodes
+  in the cluster reaches 2.
+  In order to allow the cluster to step down from 2 members to only 1,
+  the ``auto_tie_breaker`` parameter needs to be set;
+  this is not recommended for production environments.
+
+- ``last_man_standing_window`` specifies the time, in milliseconds,
+  required to recalculate quorum after one or more hosts
+  have been lost from the cluster.
+  To do the new quorum recalculation,
+  the cluster must have quorum for at least the interval
+  specified for  ``last_man_standing_window``;
+  the default is 10000ms.
+
+
+.. _pacemaker-corosync-start:
+
+Start Corosync
+--------------
+
+Corosync is started as a regular system service.
+Depending on your distribution, it may ship with an LSB init script,
+an upstart job, or a systemd unit file.
+Either way, the service is usually named corosync:
+
+- :command:`# /etc/init.d/corosync start` (LSB)
+- :command:`# service corosync start` (LSB, alternate)
+- :command:`# start corosync` (upstart)
+- :command:`# systemctl start corosync` (systemd)
+
+You can now check the Corosync connectivity with two tools.
+
+Use the :command:`corosync-cfgtool` utility with the :option:`-s` option
+to get a summary of the health of the communication rings:
+
+.. code-block:: console
+
+   # corosync-cfgtool -s
+   Printing ring status.
+   Local node ID 435324542
+   RING ID 0
+           id      = 10.0.0.82
+           status  = ring 0 active with no faults
+   RING ID 1
+           id      = 10.0.42.100
+           status  = ring 1 active with no faults
+
+Use the :command:`corosync-objctl` utility
+to dump the Corosync cluster member list:
+
+.. code-block:: console
+
+   # corosync-objctl runtime.totem.pg.mrp.srp.members
+   runtime.totem.pg.mrp.srp.435324542.ip=r(0) ip(10.0.0.82) r(1) ip(10.0.42.100)
+   runtime.totem.pg.mrp.srp.435324542.join_count=1
+   runtime.totem.pg.mrp.srp.435324542.status=joined
+   runtime.totem.pg.mrp.srp.983895584.ip=r(0) ip(10.0.0.87) r(1) ip(10.0.42.254)
+   runtime.totem.pg.mrp.srp.983895584.join_count=1
+   runtime.totem.pg.mrp.srp.983895584.status=joined
+
+You should see a ``status=joined`` entry
+for each of your constituent cluster nodes.
+
+[TODO: Should the main example now use corosync-cmapctl and have the note
+give the command for Corosync version 1?]
+
+.. note::
+
+   If you are using Corosync version 2, use the :command:`corosync-cmapctl`
+   utility instead of :command:`corosync-objctl`; it is a direct replacement.
+
+.. _pacemaker-start:
+
+Start Pacemaker
+---------------
+
+After the Corosync services have been started
+and you have verified that the cluster is communicating properly,
+you can start :command:`pacemakerd`, the Pacemaker master control process:
+
+- :command:`# /etc/init.d/pacemaker start` (LSB)
+
+- :command:`# service pacemaker start` (LSB, alternate)
+
+- :command:`# start pacemaker` (upstart)
+
+- :command:`# systemctl start pacemaker` (systemd)
+
+After the Pacemaker services have started,
+Pacemaker creates a default empty cluster configuration with no resources.
+Use the :command:`crm_mon` utility to observe the status of Pacemaker:
+
+.. code-block:: console
+
+   ============
+   Last updated: Sun Oct  7 21:07:52 2012
+   Last change: Sun Oct  7 20:46:00 2012 via cibadmin on controller2
+   Stack: openais
+   Current DC: controller2 - partition with quorum
+   Version: 1.1.6-9971ebba4494012a93c03b40a2c58ec0eb60f50c
+   3 Nodes configured, 3 expected votes
+   0 Resources configured.
+   ============
+
+   Online: [ controller3 controller2 controller1 ]
+
+.. _pacemaker-cluster-properties:
+
+Set basic cluster properties
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+After you set up your Pacemaker cluster,
+you should set a few basic cluster properties:
+
+``crmsh``
+
+.. code-block:: console
+
+   $ crm configure property pe-warn-series-max="1000" \
+     pe-input-series-max="1000" \
+     pe-error-series-max="1000" \
+     cluster-recheck-interval="5min"
+
+``pcs``
+
+.. code-block:: console
+
+   $ pcs property set pe-warn-series-max=1000 \
+     pe-input-series-max=1000 \
+     pe-error-series-max=1000 \
+     cluster-recheck-interval=5min
+
+Note the following:
+
+- Setting the ``pe-warn-series-max``, ``pe-input-series-max``
+  and ``pe-error-series-max`` parameters to 1000
+  instructs Pacemaker to keep a longer history of the inputs processed
+  and errors and warnings generated by its Policy Engine.
+  This history is useful if you need to troubleshoot the cluster.
+
+- Pacemaker uses an event-driven approach to cluster state processing.
+  The ``cluster-recheck-interval`` parameter (which defaults to 15 minutes)
+  defines the interval at which certain Pacemaker actions occur.
+  It is usually prudent to reduce this to a shorter interval,
+  such as 5 or 3 minutes.
+
+After you make these changes, you may commit the updated configuration.
--- a/doc/ha-guide/source/controller-ha-rabbitmq.rst
+++ b/doc/ha-guide/source/controller-ha-rabbitmq.rst
@@ -0,0 +1,310 @@
+========
+RabbitMQ
+========
+
+An AMQP (Advanced Message Queuing Protocol) compliant message bus is
+required for most OpenStack components in order to coordinate the
+execution of jobs entered into the system.
+
+The most popular AMQP implementation used in OpenStack installations
+is RabbitMQ.
+
+RabbitMQ nodes fail over both on the application and the
+infrastructure layers.
+
+The application layer is controlled by the ``oslo.messaging``
+configuration options for multiple AMQP hosts. If the AMQP node fails,
+the application reconnects to the next one configured within the
+specified reconnect interval. The specified reconnect interval
+constitutes its SLA.
+
+On the infrastructure layer, the SLA is the time for which RabbitMQ
+cluster reassembles. Several cases are possible. The Mnesia keeper
+node is the master of the corresponding Pacemaker resource for
+RabbitMQ; when it fails, the result is a full AMQP cluster downtime
+interval. Normally, its SLA is no more than several minutes. Failure
+of another node that is a slave of the corresponding Pacemaker
+resource for RabbitMQ results in no AMQP cluster downtime at all.
+
+Making the RabbitMQ service highly available involves the following steps:
+
+- :ref:`Install RabbitMQ<rabbitmq-install>`
+
+- :ref:`Configure RabbitMQ for HA queues<rabbitmq-configure>`
+
+- :ref:`Configure OpenStack services to use Rabbit HA queues
+  <rabbitmq-services>`
+
+.. note::
+
+   Access to RabbitMQ is not normally handled by HAproxy. Instead,
+   consumers must be supplied with the full list of hosts running
+   RabbitMQ with ``rabbit_hosts`` and turn on the ``rabbit_ha_queues``
+   option.
+
+   Jon Eck found the `core issue
+   <http://people.redhat.com/jeckersb/private/vip-failover-tcp-persist.html>`_
+   and went into some detail regarding the `history and solution
+   <http://john.eckersberg.com/improving-ha-failures-with-tcp-timeouts.html>`_
+   on his blog.
+
+   In summary though:
+
+   The source address for the connection from HAProxy back to the
+   client is the VIP address. However the VIP address is no longer
+   present on the host. This means that the network (IP) layer
+   deems the packet unroutable, and informs the transport (TCP)
+   layer. TCP, however, is a reliable transport. It knows how to
+   handle transient errors and will retry. And so it does.
+
+   In this case that is a problem though, because:
+
+   TCP generally holds on to hope for a long time. A ballpark
+   estimate is somewhere on the order of tens of minutes (30
+   minutes is commonly referenced). During this time it will keep
+   probing and trying to deliver the data.
+
+   It is important to note that HAProxy has no idea that any of this is
+   happening. As far as its process is concerned, it called
+   ``write()`` with the data and the kernel returned success. The
+   resolution is already understood and just needs to make its way
+   through a review.
+
+.. _rabbitmq-install:
+
+Install RabbitMQ
+~~~~~~~~~~~~~~~~
+
+The commands for installing RabbitMQ are specific to the Linux distribution
+you are using:
+
+.. list-table:: Install RabbitMQ
+   :widths: 15 30
+   :header-rows: 1
+
+   * - Distribution
+     - Command
+   * - Ubuntu, Debian
+     - :command:`# apt-get install rabbitmq-server`
+   * - RHEL, Fedora, CentOS
+     - :command:`# yum install rabbitmq-server`
+   * - openSUSE
+     - :command:`# zypper install rabbitmq-server`
+   * - SLES 12
+     - :command:`# zypper addrepo -f obs://Cloud:OpenStack:Kilo/SLE_12 Kilo`
+
+       [Verify fingerprint of imported GPG key; see below]
+
+       :command:`# zypper install rabbitmq-server`
+
+
+.. note::
+
+   For SLES 12, the packages are signed by GPG key 893A90DAD85F9316.
+   You should verify the fingerprint of the imported GPG key before using it.
+
+   ::
+
+      Key ID: 893A90DAD85F9316
+      Key Name: Cloud:OpenStack OBS Project <Cloud:OpenStack@build.opensuse.org>
+      Key Fingerprint: 35B34E18ABC1076D66D5A86B893A90DAD85F9316
+      Key Created: Tue Oct  8 13:34:21 2013
+      Key Expires: Thu Dec 17 13:34:21 2015
+
+For more information,
+see the official installation manual for the distribution:
+
+- `Debian and Ubuntu <http://www.rabbitmq.com/install-debian.html>`_
+- `RPM based <http://www.rabbitmq.com/install-rpm.html>`_
+  (RHEL, Fedora, CentOS, openSUSE)
+
+.. _rabbitmq-configure:
+
+Configure RabbitMQ for HA queues
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+[TODO: This section should begin with a brief mention
+about what HA queues are and why they are valuable, etc]
+
+We are building a cluster of RabbitMQ nodes to construct a RabbitMQ broker,
+which is a logical grouping of several Erlang nodes.
+
+The following components/services can work with HA queues:
+
+[TODO: replace "currently" with specific release names]
+
+[TODO: Does this list need to be updated? Perhaps we need a table
+that shows each component and the earliest release that allows it
+to work with HA queues.]
+
+- OpenStack Compute
+- OpenStack Block Storage
+- OpenStack Networking
+- Telemetry
+
+We have to consider that, while exchanges and bindings
+survive the loss of individual nodes,
+queues and their messages do not
+because a queue and its contents are located on one node.
+If we lose this node, we also lose the queue.
+
+Mirrored queues in RabbitMQ improve
+the availability of service since it is resilient to failures.
+
+Production servers should run (at least) three RabbitMQ servers;
+for testing and demonstration purposes,
+it is possible to run only two servers.
+In this section, we configure two nodes,
+called ``rabbit1`` and ``rabbit2``.
+To build a broker, we need to ensure
+that all nodes have the same Erlang cookie file.
+
+[TODO: Should the example instead use a minimum of three nodes?]
+
+#. To do so, stop RabbitMQ everywhere and copy the cookie
+   from the first node to each of the other node(s):
+
+   .. code-block:: console
+
+      # scp /var/lib/rabbitmq/.erlang.cookie root@NODE:/var/lib/rabbitmq/.erlang.cookie
+
+#. On each target node, verify the correct owner,
+   group, and permissions of the file :file:`erlang.cookie`.
+
+   .. code-block:: console
+
+      # chown rabbitmq:rabbitmq /var/lib/rabbitmq/.erlang.cookie
+      # chmod 400 /var/lib/rabbitmq/.erlang.cookie
+
+#. Start the message queue service on all nodes and configure it to start
+   when the system boots.
+
+   On Ubuntu, it is configured by default.
+
+   On CentOS, RHEL, openSUSE, and SLES:
+
+   .. code-block:: console
+
+      # systemctl enable rabbitmq-server.service
+      # systemctl start rabbitmq-server.service
+
+#. Verify that the nodes are running:
+
+   .. code-block:: console
+
+      # rabbitmqctl cluster_status
+      Cluster status of node rabbit@NODE...
+      [{nodes,[{disc,[rabbit@NODE]}]},
+       {running_nodes,[rabbit@NODE]},
+       {partitions,[]}]
+      ...done.
+
+#. Run the following commands on each node except the first one:
+
+   .. code-block:: console
+
+      # rabbitmqctl stop_app
+      Stopping node rabbit@NODE...
+      ...done.
+      # rabbitmqctl join_cluster --ram rabbit@rabbit1
+      # rabbitmqctl start_app
+      Starting node rabbit@NODE ...
+      ...done.
+
+.. note::
+
+   The default node type is a disc node. In this guide, nodes
+   join the cluster as RAM nodes.
+
+#. To verify the cluster status:
+
+   .. code-block:: console
+
+      # rabbitmqctl cluster_status
+      Cluster status of node rabbit@NODE...
+      [{nodes,[{disc,[rabbit@rabbit1]},{ram,[rabbit@NODE]}]}, \
+          {running_nodes,[rabbit@NODE,rabbit@rabbit1]}]
+
+   If the cluster is working,
+   you can create usernames and passwords for the queues.
+
+#. To ensure that all queues except those with auto-generated names
+   are mirrored across all running nodes,
+   set the ``ha-mode`` policy key to all
+   by running the following command on one of the nodes:
+
+   .. code-block:: console
+
+      # rabbitmqctl set_policy ha-all '^(?!amq\.).*' '{"ha-mode": "all"}'
+
+More information is available in the RabbitMQ documentation:
+
+- `Highly Available Queues <http://www.rabbitmq.com/ha.html>`_
+- `Clustering Guide <https://www.rabbitmq.com/clustering.html>`_
+
+.. note::
+
+   As another option to make RabbitMQ highly available, RabbitMQ contains the
+   OCF scripts for the Pacemaker cluster resource agents since version 3.5.7.
+   It provides the active/active RabbitMQ cluster with mirrored queues.
+   For more information, see `Auto-configuration of a cluster with
+   a Pacemaker <http://www.rabbitmq.com/pacemaker.html>`_.
+
+.. _rabbitmq-services:
+
+Configure OpenStack services to use Rabbit HA queues
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We have to configure the OpenStack components
+to use at least two RabbitMQ nodes.
+
+Do this configuration on all services using RabbitMQ:
+
+#. RabbitMQ HA cluster host:port pairs:
+
+   ::
+
+      rabbit_hosts=rabbit1:5672,rabbit2:5672,rabbit3:5672
+
+#. How frequently to retry connecting with RabbitMQ:
+   [TODO: document the unit of measure here? Seconds?]
+
+   ::
+
+      rabbit_retry_interval=1
+
+#. How long to back-off for between retries when connecting to RabbitMQ:
+   [TODO: document the unit of measure here? Seconds?]
+
+   ::
+
+      rabbit_retry_backoff=2
+
+#. Maximum retries with trying to connect to RabbitMQ (infinite by default):
+
+   ::
+
+      rabbit_max_retries=0
+
+#. Use durable queues in RabbitMQ:
+
+   ::
+
+      rabbit_durable_queues=true
+
+#. Use HA queues in RabbitMQ (x-ha-policy: all):
+
+   ::
+
+      rabbit_ha_queues=true
+
+.. note::
+
+   If you change the configuration from an old set-up
+   that did not use HA queues, you should restart the service:
+
+   .. code-block:: console
+
+      # rabbitmqctl stop_app
+      # rabbitmqctl reset
+      # rabbitmqctl start_app
--- a/doc/ha-guide/source/controller-ha-telemetry.rst
+++ b/doc/ha-guide/source/controller-ha-telemetry.rst
@@ -0,0 +1,78 @@
+
+=========
+Telemetry
+=========
+
+[TODO (Add Telemetry overview)]
+
+Telemetry central agent
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The Telemetry central agent can be configured to partition its polling
+workload between multiple agents, enabling high availability.
+
+Both the central and the compute agent can run in an HA deployment,
+which means that multiple instances of these services can run in
+parallel with workload partitioning among these running instances.
+
+The `Tooz <https://pypi.python.org/pypi/tooz>`__ library provides
+the coordination within the groups of service instances.
+It provides an API above several back ends that can be used for building
+distributed applications.
+
+Tooz supports
+`various drivers <http://docs.openstack.org/developer/tooz/drivers.html>`__
+including the following back end solutions:
+
+* `Zookeeper <http://zookeeper.apache.org/>`__.
+    Recommended solution by the Tooz project.
+
+* `Redis <http://redis.io/>`__.
+    Recommended solution by the Tooz project.
+
+* `Memcached <http://memcached.org/>`__.
+    Recommended for testing.
+
+You must configure a supported Tooz driver for the HA deployment of
+the Telemetry services.
+
+For information about the required configuration options that have
+to be set in the :file:`ceilometer.conf` configuration file for both
+the central and compute agents, see the `coordination section
+<http://docs.openstack.org/liberty/config-reference/content/
+ch_configuring-openstack-telemetry.html>`__
+in the OpenStack Configuration Reference.
+
+.. note:: Without the ``backend_url`` option being set only one
+   instance of both the central and compute agent service is able to run
+   and function correctly.
+
+The availability check of the instances is provided by heartbeat messages.
+When the connection with an instance is lost, the workload will be
+reassigned within the remained instances in the next polling cycle.
+
+.. note:: Memcached uses a timeout value, which should always be set to
+   a value that is higher than the heartbeat value set for Telemetry.
+
+For backward compatibility and supporting existing deployments, the central
+agent configuration also supports using different configuration files for
+groups of service instances of this type that are running in parallel.
+For enabling this configuration, set a value for the partitioning_group_prefix
+option in the `central section <http://docs.openstack.org/liberty/
+config-reference/content/ch_configuring-openstack-telemetry.html>`__
+in the OpenStack Configuration Reference.
+
+.. warning:: For each sub-group of the central agent pool with the same
+   ``partitioning_group_prefix`` a disjoint subset of meters must be polled --
+   otherwise samples may be missing or duplicated. The list of meters to poll
+   can be set in the :file:`/etc/ceilometer/pipeline.yaml` configuration file.
+   For more information about pipelines see the `Data collection and
+   processing
+   <http://docs.openstack.org/admin-guide/telemetry-data-collection.html#data-collection-and-processing>`__
+   section.
+
+To enable the compute agent to run multiple instances simultaneously with
+workload partitioning, the workload_partitioning option has to be set to
+``True`` under the `compute section <http://docs.openstack.org/liberty/
+config-reference/content/ch_configuring-openstack-telemetry.html>`__
+in the :file:`ceilometer.conf` configuration file.
--- a/doc/ha-guide/source/controller-ha-vip.rst
+++ b/doc/ha-guide/source/controller-ha-vip.rst
@@ -0,0 +1,24 @@
+
+=================
+Configure the VIP
+=================
+
+You must select and assign a virtual IP address (VIP)
+that can freely float between cluster nodes.
+
+This configuration creates ``vip``,
+a virtual IP address for use by the API node (``10.0.0.11``):
+
+For ``crmsh``:
+
+.. code-block:: console
+
+   primitive vip ocf:heartbeat:IPaddr2 \
+     params ip="10.0.0.11" cidr_netmask="24" op monitor interval="30s"
+
+For ``pcs``:
+
+.. code-block:: console
+
+   # pcs resource create vip ocf:heartbeat:IPaddr2 \
+     params ip="10.0.0.11" cidr_netmask="24" op monitor interval="30s"
--- a/doc/ha-guide/source/controller-ha.rst
+++ b/doc/ha-guide/source/controller-ha.rst
@@ -0,0 +1,20 @@
+
+================================================
+Configuring the controller for high availability
+================================================
+
+The cloud controller runs on the management network
+and must talk to all other services.
+
+.. toctree::
+   :maxdepth: 2
+
+   controller-ha-pacemaker.rst
+   controller-ha-vip.rst
+   controller-ha-haproxy.rst
+   controller-ha-galera.rst
+   controller-ha-memcached.rst
+   controller-ha-rabbitmq.rst
+   controller-ha-keystone.rst
+   controller-ha-telemetry.rst
+
--- a/doc/ha-guide/source/figures/Cluster-deployment-collapsed.png
+++ b/doc/ha-guide/source/figures/Cluster-deployment-collapsed.png
--- a/doc/ha-guide/source/figures/Cluster-deployment-segregated.png
+++ b/doc/ha-guide/source/figures/Cluster-deployment-segregated.png
--- a/doc/ha-guide/source/figures/keepalived-arch.jpg
+++ b/doc/ha-guide/source/figures/keepalived-arch.jpg
--- a/doc/ha-guide/source/hardware-ha-basic.rst
+++ b/doc/ha-guide/source/hardware-ha-basic.rst
@@ -0,0 +1,47 @@
+
+==============
+Hardware setup
+==============
+
+The standard hardware requirements:
+
+- `Provider networks <http://docs.openstack.org/liberty/install-guide-ubuntu/overview.html#networking-option-1-provider-networks>`_
+- `Self-service networks <http://docs.openstack.org/liberty/install-guide-ubuntu/overview.html#networking-option-2-self-service-networks>`_
+
+However, OpenStack does not require a significant amount of resources
+and the following minimum requirements should support
+a proof-of-concept high availability environment
+with core services and several instances:
+
+[TODO: Verify that these numbers are good]
+
+-------------------+------------+----------+-----------+------+
+| Node type         | Processor  | Memory   | Storage   | NIC  |
+===================+============+==========+===========+======+
+| controller node   | 1-2        | 8 GB     | 100 GB    | 2    |
+-------------------+------------+----------+-----------+------+
+| compute node      | 2-4+       | 8+ GB    | 100+ GB   | 2    |
+-------------------+------------+----------+-----------+------+
+
+
+For demonstrations and studying,
+you can set up a test environment on virtual machines (VMs).
+This has the following benefits:
+
+- One physical server can support multiple nodes,
+  each of which supports almost any number of network interfaces.
+
+- Ability to take periodic "snap shots" throughout the installation process
+  and "roll back" to a working configuration in the event of a problem.
+
+However, running an OpenStack environment on VMs
+degrades the performance of your instances,
+particularly if your hypervisor and/or processor lacks support
+for hardware acceleration of nested VMs.
+
+.. note::
+
+   When installing highly-available OpenStack on VMs,
+   be sure that your hypervisor permits promiscuous mode
+   and disables MAC address filtering on the external network.
+
--- a/doc/ha-guide/source/hardware-ha.rst
+++ b/doc/ha-guide/source/hardware-ha.rst
@@ -0,0 +1,15 @@
+
+=============================================
+Hardware considerations for high availability
+=============================================
+
+[TODO: Provide a minimal architecture example for HA,
+expanded on that given in
+http://docs.openstack.org/liberty/install-guide-ubuntu/environment.html
+for easy comparison]
+
+
+.. toctree::
+   :maxdepth: 2
+
+   hardware-ha-basic.rst
--- a/doc/ha-guide/source/index.rst
+++ b/doc/ha-guide/source/index.rst
@@ -0,0 +1,43 @@
+=================================
+OpenStack High Availability Guide
+=================================
+
+Abstract
+~~~~~~~~
+
+This guide describes how to install and configure
+OpenStack for high availability.
+It supplements the OpenStack Installation Guides
+and assumes that you are familiar with the material in those guides.
+
+This guide documents OpenStack Mitaka, OpenStack Liberty, and OpenStack
+Kilo releases.
+
+.. warning:: This guide is a work-in-progress and changing rapidly
+   while we continue to test and enhance the guidance. Please note
+   where there are open "to do" items and help where you are able.
+
+Contents
+~~~~~~~~
+
+.. toctree::
+   :maxdepth: 2
+
+   common/conventions.rst
+   intro-ha.rst
+   hardware-ha.rst
+   install-ha.rst
+   networking-ha.rst
+   controller-ha.rst
+   storage-ha.rst
+   compute-node-ha.rst
+   noncore-ha.rst
+
+
+   common/app_support.rst
+   common/glossary.rst
+
+Search in this guide
+~~~~~~~~~~~~~~~~~~~~
+
+* :ref:`search`
--- a/doc/ha-guide/source/install-ha-memcached.rst
+++ b/doc/ha-guide/source/install-ha-memcached.rst
@@ -0,0 +1,42 @@
+
+=================
+Install memcached
+=================
+
+[TODO:  Verify that Oslo supports hash synchronization;
+if so, this should not take more than load balancing.]
+
+[TODO: This hands off to two different docs for install information.
+We should choose one or explain the specific purpose of each.]
+
+Most OpenStack services can use memcached
+to store ephemeral data such as tokens.
+Although memcached does not support
+typical forms of redundancy such as clustering,
+OpenStack services can use almost any number of instances
+by configuring multiple hostnames or IP addresses.
+The memcached client implements hashing
+to balance objects among the instances.
+Failure of an instance only impacts a percentage of the objects
+and the client automatically removes it from the list of instances.
+
+To install and configure memcached, read the
+`official documentation <https://code.google.com/p/memcached/wiki/NewStart>`_.
+
+Memory caching is managed by `oslo.cache
+<http://specs.openstack.org/openstack/oslo-specs/specs/kilo/oslo-cache-using-dogpile.html>`_
+so the way to use multiple memcached servers is the same for all projects.
+
+[TODO: Should this show three hosts?]
+
+Example configuration with two hosts:
+
+::
+
+  memcached_servers = controller1:11211,controller2:11211
+
+By default, `controller1` handles the caching service but,
+if the host goes down, `controller2` does the job.
+For more information about memcached installation,
+see the `OpenStack Administrator Guide
+<http://docs.openstack.org/admin-guide/>`_.
--- a/doc/ha-guide/source/install-ha-ntp.rst
+++ b/doc/ha-guide/source/install-ha-ntp.rst
@@ -0,0 +1,9 @@
+=============
+Configure NTP
+=============
+
+You must configure NTP to properly synchronize services among nodes.
+We recommend that you configure the controller node to reference
+more accurate (lower stratum) servers and other nodes to reference
+the controller node. For more information, see the
+`Install Guides <http://docs.openstack.org/#install-guides>`_.
--- a/doc/ha-guide/source/install-ha-os.rst
+++ b/doc/ha-guide/source/install-ha-os.rst
@@ -0,0 +1,24 @@
+=====================================
+Install operating system on each node
+=====================================
+
+The first step in setting up your highly-available OpenStack cluster
+is to install the operating system on each node.
+Follow the instructions in the OpenStack Installation Guides:
+
+- `CentOS and RHEL <http://docs.openstack.org/liberty/install-guide-rdo/environment.html>`_
+- `openSUSE and SUSE Linux Enterprise Server  <http://docs.openstack.org/liberty/install-guide-obs/environment.html>`_
+- `Ubuntu <http://docs.openstack.org/liberty/install-guide-ubuntu/environment.html>`_
+
+The OpenStack Installation Guides also include a list of the services
+that use passwords with important notes about using them.
+
+This guide uses the following example IP addresses:
+
+.. code-block:: none
+
+   # controller
+   10.0.0.11       controller # virtual IP
+   10.0.0.12       controller1
+   10.0.0.13       controller2
+   10.0.0.14       controller3
--- a/doc/ha-guide/source/install-ha.rst
+++ b/doc/ha-guide/source/install-ha.rst
@@ -0,0 +1,12 @@
+=====================================
+Installing high availability packages
+=====================================
+
+[TODO -- write intro to this section]
+
+.. toctree::
+   :maxdepth: 2
+
+   install-ha-os.rst
+   install-ha-memcached.rst
+   install-ha-ntp.rst
--- a/doc/ha-guide/source/intro-ha-arch-keepalived.rst
+++ b/doc/ha-guide/source/intro-ha-arch-keepalived.rst
@@ -0,0 +1,96 @@
+============================
+The keepalived architecture
+============================
+
+High availability strategies
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following diagram shows a very simplified view of the different
+strategies used to achieve high availability for the OpenStack
+services:
+
+.. image:: /figures/keepalived-arch.jpg
+   :width: 100%
+
+Depending on the method used to communicate with the service, the
+following availability strategies will be followed:
+
+-  Keepalived, for the HAProxy instances.
+-  Access via an HAProxy virtual IP, for services such as HTTPd that
+   are accessed via a TCP socket that can be load balanced
+-  Built-in application clustering, when available from the application.
+   Galera is one example of this.
+-  Starting up one instance of the service on several controller nodes,
+   when they can coexist and coordinate by other means. RPC in
+   ``nova-conductor`` is one example of this.
+-  No high availability, when the service can only work in
+   active/passive mode.
+
+There are known issues with cinder-volume that recommend setting it as
+active-passive for now, see:
+https://blueprints.launchpad.net/cinder/+spec/cinder-volume-active-active-support
+
+While there will be multiple neutron LBaaS agents running, each agent
+will manage a set of load balancers, that cannot be failed over to
+another node.
+
+Architecture limitations
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+This architecture has some inherent limitations that should be kept in
+mind during deployment and daily operations.
+The following sections describe these limitations.
+
+#. Keepalived and network partitions
+
+   In case of a network partitioning, there is a chance that two or
+   more nodes running keepalived claim to hold the same VIP, which may
+   lead to an undesired behaviour. Since keepalived uses VRRP over
+   multicast to elect a master (VIP owner), a network partition in
+   which keepalived nodes cannot communicate will result in the VIPs
+   existing on two nodes. When the network partition is resolved, the
+   duplicate VIPs should also be resolved. Note that this network
+   partition problem with VRRP is a known limitation for this
+   architecture.
+
+#. Cinder-volume as a single point of failure
+
+   There are currently concerns over the cinder-volume service ability
+   to run as a fully active-active service. During the Mitaka
+   timeframe, this is being worked on, see:
+   https://blueprints.launchpad.net/cinder/+spec/cinder-volume-active-active-support
+   Thus, cinder-volume will only be running on one of the controller
+   nodes, even if it will be configured on all nodes. In case of a
+   failure in the node running cinder-volume, it should be started in
+   a surviving controller node.
+
+#. Neutron-lbaas-agent as a single point of failure
+
+   The current design of the neutron LBaaS agent using the HAProxy
+   driver does not allow high availability for the tenant load
+   balancers. The neutron-lbaas-agent service will be enabled and
+   running on all controllers, allowing for load balancers to be
+   distributed across all nodes. However, a controller node failure
+   will stop all load balancers running on that node until the service
+   is recovered or the load balancer is manually removed and created
+   again.
+
+#. Service monitoring and recovery required
+
+   An external service monitoring infrastructure is required to check
+   the OpenStack service health, and notify operators in case of any
+   failure. This architecture does not provide any facility for that,
+   so it would be necessary to integrate the OpenStack deployment with
+   any existing monitoring environment.
+
+#. Manual recovery after a full cluster restart
+
+   Some support services used by RDO or RHEL OSP use their own form of
+   application clustering. Usually, these services maintain a cluster
+   quorum, that may be lost in case of a simultaneous restart of all
+   cluster nodes, for example during a power outage. Each service will
+   require its own procedure to regain quorum.
+
+If you find any or all of these limitations concerning, you are
+encouraged to refer to the
+:doc:`Pacemaker HA architecture<intro-ha-arch-pacemaker>` instead.
--- a/doc/ha-guide/source/intro-ha-arch-pacemaker.rst
+++ b/doc/ha-guide/source/intro-ha-arch-pacemaker.rst
@@ -0,0 +1,198 @@
+==========================
+The Pacemaker architecture
+==========================
+
+What is a cluster manager
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+At its core, a cluster is a distributed finite state machine capable
+of co-ordinating the startup and recovery of inter-related services
+across a set of machines.
+
+Even a distributed and/or replicated application that is able to
+survive failures on one or more machines can benefit from a
+cluster manager:
+
+#. Awareness of other applications in the stack
+
+   While SYS-V init replacements like systemd can provide
+   deterministic recovery of a complex stack of services, the
+   recovery is limited to one machine and lacks the context of what
+   is happening on other machines - context that is crucial to
+   determine the difference between a local failure, clean startup
+   and recovery after a total site failure.
+
+#. Awareness of instances on other machines
+
+   Services like RabbitMQ and Galera have complicated boot-up
+   sequences that require co-ordination, and often serialization, of
+   startup operations across all machines in the cluster. This is
+   especially true after site-wide failure or shutdown where we must
+   first determine the last machine to be active.
+
+#. A shared implementation and calculation of `quorum
+   <http://en.wikipedia.org/wiki/Quorum_(Distributed_Systems)>`_.
+
+   It is very important that all members of the system share the same
+   view of who their peers are and whether or not they are in the
+   majority. Failure to do this leads very quickly to an internal
+   `split-brain <http://en.wikipedia.org/wiki/Split-brain_(computing)>`_
+   state - where different parts of the system are pulling in
+   different and incompatible directions.
+
+#. Data integrity through fencing (a non-responsive process does not
+   imply it is not doing anything)
+
+   A single application does not have sufficient context to know the
+   difference between failure of a machine and failure of the
+   applcation on a machine. The usual practice is to assume the
+   machine is dead and carry on, however this is highly risky - a
+   rogue process or machine could still be responding to requests and
+   generally causing havoc. The safer approach is to make use of
+   remotely accessible power switches and/or network switches and SAN
+   controllers to fence (isolate) the machine before continuing.
+
+#. Automated recovery of failed instances
+
+   While the application can still run after the failure of several
+   instances, it may not have sufficient capacity to serve the
+   required volume of requests. A cluster can automatically recover
+   failed instances to prevent additional load induced failures.
+
+For this reason, the use of a cluster manager like `Pacemaker
+<http://clusterlabs.org>`_ is highly recommended.
+
+Deployment flavors
+~~~~~~~~~~~~~~~~~~
+
+It is possible to deploy three different flavors of the Pacemaker
+architecture. The two extremes are **Collapsed** (where every
+component runs on every node) and **Segregated** (where every
+component runs in its own 3+ node cluster).
+
+Regardless of which flavor you choose, it is recommended that the
+clusters contain at least three nodes so that we can take advantage of
+`quorum <quorum_>`_.
+
+Quorum becomes important when a failure causes the cluster to split in
+two or more partitions. In this situation, you want the majority to
+ensure the minority are truly dead (through fencing) and continue to
+host resources. For a two-node cluster, no side has the majority and
+you can end up in a situation where both sides fence each other, or
+both sides are running the same services - leading to data corruption.
+
+Clusters with an even number of hosts suffer from similar issues - a
+single network failure could easily cause a N:N split where neither
+side retains a majority. For this reason, we recommend an odd number
+of cluster members when scaling up.
+
+You can have up to 16 cluster members (this is currently limited by
+the ability of corosync to scale higher). In extreme cases, 32 and
+even up to 64 nodes could be possible, however, this is not well tested.
+
+Collapsed
+---------
+
+In this configuration, there is a single cluster of 3 or more
+nodes on which every component is running.
+
+This scenario has the advantage of requiring far fewer, if more
+powerful, machines. Additionally, being part of a single cluster
+allows us to accurately model the ordering dependencies between
+components.
+
+This scenario can be visualized as below.
+
+.. image:: /figures/Cluster-deployment-collapsed.png
+   :width: 100%
+
+You would choose this option if you prefer to have fewer but more
+powerful boxes.
+
+This is the most common option and the one we document here.
+
+Segregated
+----------
+
+In this configuration, each service runs in a dedicated cluster of
+3 or more nodes.
+
+The benefits of this approach are the physical isolation between
+components and the ability to add capacity to specific components.
+
+You would choose this option if you prefer to have more but
+less powerful boxes.
+
+This scenario can be visualized as below, where each box below
+represents a cluster of three or more guests.
+
+.. image:: /figures/Cluster-deployment-segregated.png
+   :width: 100%
+
+Mixed
+-----
+
+It is also possible to follow a segregated approach for one or more
+components that are expected to be a bottleneck and use a collapsed
+approach for the remainder.
+
+
+Proxy server
+~~~~~~~~~~~~
+
+Almost all services in this stack benefit from being proxied.
+Using a proxy server provides:
+
+#. Load distribution
+
+   Many services can act in an active/active capacity, however, they
+   usually require an external mechanism for distributing requests to
+   one of the available instances. The proxy server can serve this
+   role.
+
+#. API isolation
+
+   By sending all API access through the proxy, we can clearly
+   identify service interdependencies. We can also move them to
+   locations other than ``localhost`` to increase capacity if the
+   need arises.
+
+#. Simplified process for adding/removing of nodes
+
+   Since all API access is directed to the proxy, adding or removing
+   nodes has no impact on the configuration of other services. This
+   can be very useful in upgrade scenarios where an entirely new set
+   of machines can be configured and tested in isolation before
+   telling the proxy to direct traffic there instead.
+
+#. Enhanced failure detection
+
+   The proxy can be configured as a secondary mechanism for detecting
+   service failures. It can even be configured to look for nodes in
+   a degraded state (such as being 'too far' behind in the
+   replication) and take them out of circulation.
+
+The following components are currently unable to benefit from the use
+of a proxy server:
+
+* RabbitMQ
+* Memcached
+* MongoDB
+
+However, the reasons vary and are discussed under each component's
+heading.
+
+We recommend HAProxy as the load balancer, however, there are many
+alternatives in the marketplace.
+
+We use a check interval of 1 second, however, the timeouts vary by service.
+
+Generally, we use round-robin to distribute load amongst instances of
+active/active services, however, Galera uses the ``stick-table`` options
+to ensure that incoming connections to the virtual IP (VIP) should be
+directed to only one of the available back ends.
+
+In Galera's case, although it can run active/active, this helps avoid
+lock contention and prevent deadlocks. It is used in combination with
+the ``httpchk`` option that ensures only nodes that are in sync with its
+peers are allowed to handle requests.
--- a/doc/ha-guide/source/intro-ha-compute.rst
+++ b/doc/ha-guide/source/intro-ha-compute.rst
@@ -0,0 +1,4 @@
+
+==========================================
+Overview of highly-available compute nodes
+==========================================
--- a/doc/ha-guide/source/intro-ha-concepts.rst
+++ b/doc/ha-guide/source/intro-ha-concepts.rst
@@ -0,0 +1,213 @@
+==========================
+High availability concepts
+==========================
+
+High availability systems seek to minimize two things:
+
+**System downtime**
+  Occurs when a user-facing service is unavailable
+  beyond a specified maximum amount of time.
+
+**Data loss**
+  Accidental deletion or destruction of data.
+
+Most high availability systems guarantee protection against system downtime
+and data loss only in the event of a single failure.
+However, they are also expected to protect against cascading failures,
+where a single failure deteriorates into a series of consequential failures.
+Many service providers guarantee :term:`Service Level Agreement (SLA)`
+including uptime percentage of computing service, which is calculated based
+on the available time and system downtime excluding planned outage time.
+
+Redundancy and failover
+~~~~~~~~~~~~~~~~~~~~~~~
+
+High availability is implemented with redundant hardware
+running redundant instances of each service.
+If one piece of hardware running one instance of a service fails,
+the system can then failover to use another instance of a service
+that is running on hardware that did not fail.
+
+A crucial aspect of high availability
+is the elimination of single points of failure (SPOFs).
+A SPOF is an individual piece of equipment or software
+that causes system downtime or data loss if it fails.
+In order to eliminate SPOFs, check that mechanisms exist for redundancy of:
+
+- Network components, such as switches and routers
+
+- Applications and automatic service migration
+
+- Storage components
+
+- Facility services such as power, air conditioning, and fire protection
+
+In the event that a component fails and a back-up system must take on
+its load, most high availability systems will replace the failed
+component as quickly as possible to maintain necessary redundancy. This
+way time spent in a degraded protection state is minimized.
+
+Most high availability systems fail in the event of multiple
+independent (non-consequential) failures. In this case, most
+implementations favor protecting data over maintaining availability.
+
+High availability systems typically achieve an uptime percentage of
+99.99% or more, which roughly equates to less than an hour of
+cumulative downtime per year. In order to achieve this, high
+availability systems should keep recovery times after a failure to
+about one to two minutes, sometimes significantly less.
+
+OpenStack currently meets such availability requirements for its own
+infrastructure services, meaning that an uptime of 99.99% is feasible
+for the OpenStack infrastructure proper. However, OpenStack does not
+guarantee 99.99% availability for individual guest instances.
+
+This document discusses some common methods of implementing highly
+available systems, with an emphasis on the core OpenStack services and
+other open source services that are closely aligned with OpenStack.
+These methods are by no means the only ways to do it;
+you may supplement these services with commercial hardware and software
+that provides additional features and functionality.
+You also need to address high availability concerns
+for any applications software that you run on your OpenStack environment.
+The important thing is to make sure that your services are redundant
+and available; how you achieve that is up to you.
+
+Stateless vs. stateful services
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Preventing single points of failure can depend on whether or not a
+service is stateless.
+
+Stateless service
+  A service that provides a response after your request
+  and then requires no further attention.
+  To make a stateless service highly available,
+  you need to provide redundant instances and load balance them.
+  OpenStack services that are stateless include ``nova-api``,
+  ``nova-conductor``, ``glance-api``, ``keystone-api``,
+  ``neutron-api`` and ``nova-scheduler``.
+
+Stateful service
+  A service where subsequent requests to the service
+  depend on the results of the first request.
+  Stateful services are more difficult to manage because a single
+  action typically involves more than one request, so simply providing
+  additional instances and load balancing does not solve the problem.
+  For example, if the horizon user interface reset itself every time
+  you went to a new page, it would not be very useful.
+  OpenStack services that are stateful include the OpenStack database
+  and message queue.
+  Making stateful services highly available can depend on whether you choose
+  an active/passive or active/active configuration.
+
+Active/Passive vs. Active/Active
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Stateful services may be configured as active/passive or active/active:
+
+:term:`active/passive configuration`
+  Maintains a redundant instance
+  that can be brought online when the active service fails.
+  For example, OpenStack writes to the main database
+  while maintaining a disaster recovery database that can be brought online
+  if the main database fails.
+
+  A typical active/passive installation for a stateful service maintains
+  a replacement resource that can be brought online when required.
+  Requests are handled using a :term:`virtual IP` address (VIP) that
+  facilitates returning to service with minimal reconfiguration.
+  A separate application (such as Pacemaker or Corosync) monitors
+  these services, bringing the backup online as necessary.
+
+:term:`active/active configuration`
+  Each service also has a backup but manages both the main and
+  redundant systems concurrently.
+  This way, if there is a failure, the user is unlikely to notice.
+  The backup system is already online and takes on increased load
+  while the main system is fixed and brought back online.
+
+  Typically, an active/active installation for a stateless service
+  maintains a redundant instance, and requests are load balanced using
+  a virtual IP address and a load balancer such as HAProxy.
+
+  A typical active/active installation for a stateful service includes
+  redundant services, with all instances having an identical state. In
+  other words, updates to one instance of a database update all other
+  instances. This way a request to one instance is the same as a
+  request to any other. A load balancer manages the traffic to these
+  systems, ensuring that operational systems always handle the
+  request.
+
+Clusters and quorums
+~~~~~~~~~~~~~~~~~~~~
+
+The quorum specifies the minimal number of nodes
+that must be functional in a cluster of redundant nodes
+in order for the cluster to remain functional.
+When one node fails and failover transfers control to other nodes,
+the system must ensure that data and processes remain sane.
+To determine this, the contents of the remaining nodes are compared
+and, if there are discrepancies, a "majority rules" algorithm is implemented.
+
+For this reason, each cluster in a high availability environment should
+have an odd number of nodes and the quorum is defined as more than a half
+of the nodes.
+If multiple nodes fail so that the cluster size falls below the quorum
+value, the cluster itself fails.
+
+For example, in a seven-node cluster, the quorum should be set to
+floor(7/2) + 1 == 4. If quorum is four and four nodes fail simultaneously,
+the cluster itself would fail, whereas it would continue to function, if
+no more than three nodes fail. If split to partitions of three and four nodes
+respectively, the quorum of four nodes would continue to operate the majority
+partition and stop or fence the minority one (depending on the
+no-quorum-policy cluster configuration).
+
+And the quorum could also have been set to three, just as a configuration
+example.
+
+.. note::
+
+  Note that setting the quorum to a value less than floor(n/2) + 1 is not
+  recommended and would likely cause a split-brain in a face of network
+  partitions.
+
+Then, for the given example when four nodes fail simultaneously,
+the cluster would continue to function as well. But if split to partitions of
+three and four nodes respectively, the quorum of three would have made both
+sides to attempt to fence the other and host resources. And without fencing
+enabled, it would go straight to running two copies of each resource.
+
+This is why setting the quorum to a value less than floor(n/2) + 1 is
+dangerous. However it may be required for some specific cases, like a
+temporary measure at a point it is known with 100% certainty that the other
+nodes are down.
+
+When configuring an OpenStack environment for study or demonstration purposes,
+it is possible to turn off the quorum checking;
+this is discussed later in this guide.
+Production systems should always run with quorum enabled.
+
+
+Single-controller high availability mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+OpenStack supports a single-controller high availability mode
+that is managed by the services that manage highly available environments
+but is not actually highly available because
+no redundant controllers are configured to use for failover.
+This environment can be used for study and demonstration
+but is not appropriate for a production environment.
+
+It is possible to add controllers to such an environment
+to convert it into a truly highly available environment.
+
+
+High availability is not for every user. It presents some challenges.
+High availability may be too complex for databases or
+systems with large amounts of data. Replication can slow large systems
+down. Different setups have different prerequisites. Read the guidelines
+for each setup.
+
+High availability is turned off as the default in OpenStack setups.
--- a/doc/ha-guide/source/intro-ha-controller.rst
+++ b/doc/ha-guide/source/intro-ha-controller.rst
@@ -0,0 +1,62 @@
+========================================
+Overview of highly-available controllers
+========================================
+
+OpenStack is a set of multiple services exposed to the end users
+as HTTP(s) APIs. Additionally, for own internal usage OpenStack
+requires SQL database server and AMQP broker. The physical servers,
+where all the components are running are often called controllers.
+This modular OpenStack architecture allows to duplicate all the
+components and run them on different controllers.
+By making all the components redundant it is possible to make
+OpenStack highly-available.
+
+In general we can divide all the OpenStack components into three categories:
+
+- OpenStack APIs, these are HTTP(s) stateless services written in python,
+  easy to duplicate and mostly easy to load balance.
+
+- SQL relational database server provides stateful type consumed by other
+  components. Supported databases are MySQL, MariaDB, and PostgreSQL.
+  Making SQL database redundant is complex.
+
+- :term:`Advanced Message Queuing Protocol (AMQP)` provides OpenStack
+  internal stateful communication service.
+
+Network components
+~~~~~~~~~~~~~~~~~~
+
+[TODO Need discussion of network hardware, bonding interfaces,
+intelligent Layer 2 switches, routers and Layer 3 switches.]
+
+The configuration uses static routing without
+Virtual Router Redundancy Protocol (VRRP)
+or similar techniques implemented.
+
+[TODO Need description of VIP failover inside Linux namespaces
+and expected SLA.]
+
+See [TODO link] for more information about configuring networking
+for high availability.
+
+Common deployement architectures
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There are primarily two HA architectures in use today.
+
+One uses a cluster manager such as Pacemaker or Veritas to co-ordinate
+the actions of the various services across a set of machines. Since
+we are focused on FOSS, we will refer to this as the Pacemaker
+architecture.
+
+The other is optimized for Active/Active services that do not require
+any inter-machine coordination. In this setup, services are started by
+your init system (systemd in most modern distributions) and a tool is
+used to move IP addresses between the hosts. The most common package
+for doing this is keepalived.
+
+.. toctree::
+   :maxdepth: 1
+
+   intro-ha-arch-pacemaker.rst
+   intro-ha-arch-keepalived.rst
--- a/doc/ha-guide/source/intro-ha-other.rst
+++ b/doc/ha-guide/source/intro-ha-other.rst
@@ -0,0 +1,4 @@
+
+======================================
+High availability for other components
+======================================
--- a/doc/ha-guide/source/intro-ha-storage.rst
+++ b/doc/ha-guide/source/intro-ha-storage.rst
@@ -0,0 +1,12 @@
+=====================================
+Overview of high availability storage
+=====================================
+
+Making the Block Storage (cinder) API service highly available in
+active/passive mode involves:
+
+* Configuring Block Storage to listen on the VIP address
+
+* Managing the Block Storage API daemon with the Pacemaker cluster manager
+
+* Configuring OpenStack services to use this IP address
--- a/doc/ha-guide/source/intro-ha.rst
+++ b/doc/ha-guide/source/intro-ha.rst
@@ -0,0 +1,15 @@
+
+===========================================
+Introduction to OpenStack high availability
+===========================================
+
+
+.. toctree::
+   :maxdepth: 2
+
+   intro-ha-concepts.rst
+   intro-ha-controller.rst
+   intro-ha-storage.rst
+   intro-ha-compute.rst
+   intro-ha-other.rst
+
--- a/doc/ha-guide/source/locale/ha-guide.pot
+++ b/doc/ha-guide/source/locale/ha-guide.pot
--- a/doc/ha-guide/source/locale/ja/LC_MESSAGES/ha-guide.po
+++ b/doc/ha-guide/source/locale/ja/LC_MESSAGES/ha-guide.po
--- a/doc/ha-guide/source/networking-ha-dhcp.rst
+++ b/doc/ha-guide/source/networking-ha-dhcp.rst
@@ -0,0 +1,17 @@
+
+.. _dhcp-agent:
+
+======================
+Run neutron DHCP agent
+======================
+
+The OpenStack Networking service has a scheduler
+that lets you run multiple agents across nodes;
+the DHCP agent can be natively highly available.
+To configure the number of DHCP agents per network,
+modify the ``dhcp_agents_per_network`` parameter
+in the :file:`/etc/neutron/neutron.conf` file.
+By default this is set to 1.
+To achieve high availability,
+assign more than one DHCP agent per network.
+
--- a/doc/ha-guide/source/networking-ha-l3.rst
+++ b/doc/ha-guide/source/networking-ha-l3.rst
@@ -0,0 +1,37 @@
+
+.. _neutron-l3:
+
+====================
+Run neutron L3 agent
+====================
+
+The neutron L3 agent is scalable, due to the scheduler that supports
+Virtual Router Redundancy Protocol (VRRP)
+to distribute virtual routers across multiple nodes.
+To enable high availability for configured routers,
+edit the :file:`/etc/neutron/neutron.conf` file
+to set the following values:
+
+.. list-table:: /etc/neutron/neutron.conf parameters for high availability
+   :widths: 15 10 30
+   :header-rows: 1
+
+   * - Parameter
+     - Value
+     - Description
+   * - l3_ha
+     - True
+     - All routers are highly available by default.
+   * - allow_automatic_l3agent_failover
+     - True
+     - Set automatic L3 agent failover for routers
+   * - max_l3_agents_per_router
+     - 2 or more
+     - Maximum number of network nodes to use for the HA router.
+   * - min_l3_agents_per_router
+     - 2 or more
+     - Minimum number of network nodes to use for the HA router.
+       A new router can be created only if this number
+       of network nodes are available.
+
+
--- a/doc/ha-guide/source/networking-ha-lbaas.rst
+++ b/doc/ha-guide/source/networking-ha-lbaas.rst
@@ -0,0 +1,17 @@
+
+.. _neutron-lbaas:
+
+=======================
+Run neutron LBaaS agent
+=======================
+
+Currently, no native feature is provided
+to make the LBaaS agent highly available
+using the default plug-in HAProxy.
+A common way to make HAProxy highly available
+is to use the VRRP (Virtual Router Redundancy Protocol).
+Unfortunately, this is not yet implemented
+in the LBaaS HAProxy plug-in.
+
+[TODO: update this section.]
+
--- a/doc/ha-guide/source/networking-ha-metadata.rst
+++ b/doc/ha-guide/source/networking-ha-metadata.rst
@@ -0,0 +1,18 @@
+
+.. _neutron-metadata:
+
+==========================
+Run neutron metadata agent
+==========================
+
+No native feature is available
+to make this service highly available.
+At this time, the Active/Passive solution exists
+to run the neutron metadata agent
+in failover mode with Pacemaker.
+
+[TODO: Update this information.
+Can this service now be made HA in active/active mode
+or do we need to pull in the instructions
+to run this service in active/passive mode?]
+
--- a/doc/ha-guide/source/networking-ha.rst
+++ b/doc/ha-guide/source/networking-ha.rst
@@ -0,0 +1,60 @@
+
+=======================
+OpenStack network nodes
+=======================
+
+Configure networking on each node.
+The
+`Networking <http://docs.openstack.org/liberty/install-guide-ubuntu/environment-networking.html>`_
+section of the *Install Guide* includes basic information
+about configuring networking.
+
+Notes from planning outline:
+
+- Rather than configuring neutron here,
+  we should simply mention physical network HA methods
+  such as bonding and additional node/network requirements
+  for L3HA and DVR for planning purposes.
+- Neutron agents shuld be described for active/active;
+  deprecate single agent's instances case.
+- For Kilo and beyond, focus on L3HA and DVR.
+- Link to `Networking Guide <http://docs.openstack.org/networking-guide/>`_
+  for configuration details.
+
+[TODO: Verify that the active/passive
+network configuration information from
+`<http://docs.openstack.org/high-availability-guide/content/s-neutron-server.html>`_
+should not be included here.
+
+`LP1328922 <https://bugs.launchpad.net/openstack-manuals/+bug/1328922>`_
+and
+`LP1349398 <https://bugs.launchpad.net/openstack-manuals/+bug/1349398>`_
+are related.]
+
+OpenStack network nodes contain:
+
+- :ref:`Neutron DHCP agent<dhcp-agent>`
+- Neutron L2 agent.
+  Note that the L2 agent cannot be distributed and highly available.
+  Instead, it must be installed on each data forwarding node
+  to control the virtual network drivers
+  such as Open vSwitch or Linux Bridge.
+  One L2 agent runs per node and controls its virtual interfaces.
+- :ref:`Neutron L3 agent<neutron-l3>`
+- :ref:`Neutron metadata agent<neutron-metadata>`
+- :ref:`Neutron LBaaS<neutron-lbaas>` (Load Balancing as a Service) agent
+
+.. note::
+
+   For Liberty, we do not have the standalone network nodes in general.
+   We usually run the Networking services on the controller nodes.
+   In this guide, we use the term "network nodes" for convenience.
+
+.. toctree::
+   :maxdepth: 2
+
+   networking-ha-dhcp.rst
+   networking-ha-l3.rst
+   networking-ha-metadata.rst
+   networking-ha-lbaas.rst
+
--- a/doc/ha-guide/source/noncore-ha.rst
+++ b/doc/ha-guide/source/noncore-ha.rst
@@ -0,0 +1,4 @@
+
+=====================================================
+Configuring non-core components for high availability
+=====================================================
--- a/doc/ha-guide/source/storage-ha-backend.rst
+++ b/doc/ha-guide/source/storage-ha-backend.rst
@@ -0,0 +1,85 @@
+
+.. _storage-ha-backend:
+
+================
+Storage back end
+================
+
+Most of this guide concerns the control plane of high availability:
+ensuring that services continue to run even if a component fails.
+Ensuring that data is not lost
+is the data plane component of high availability;
+this is discussed here.
+
+An OpenStack environment includes multiple data pools for the VMs:
+
+- Ephemeral storage is allocated for an instance
+  and is deleted when the instance is deleted.
+  The Compute service manages ephemeral storage.
+  By default, Compute stores ephemeral drives as files
+  on local disks on the Compute node
+  but Ceph RBD can instead be used
+  as the storage back end for ephemeral storage.
+
+- Persistent storage exists outside all instances.
+  Two types of persistent storage are provided:
+
+  - Block Storage service (cinder)
+    can use LVM or Ceph RBD as the storage back end.
+  - Image service (glance)
+    can use the Object Storage service (swift)
+    or Ceph RBD as the storage back end.
+
+For more information about configuring storage back ends for
+the different storage options, see the `Administrator Guide
+<http://docs.openstack.org/admin-guide/>`_.
+
+This section discusses ways to protect against
+data loss in your OpenStack environment.
+
+RAID drives
+-----------
+
+Configuring RAID on the hard drives that implement storage
+protects your data against a hard drive failure.
+If, however, the node itself fails, data may be lost.
+In particular, all volumes stored on an LVM node can be lost.
+
+Ceph
+----
+
+`Ceph RBD <http://ceph.com/>`_
+is an innately high availability storage back end.
+It creates a storage cluster with multiple nodes
+that communicate with each other
+to replicate and redistribute data dynamically.
+A Ceph RBD storage cluster provides
+a single shared set of storage nodes
+that can handle all classes of persistent and ephemeral data
+-- glance, cinder, and nova --
+that are required for OpenStack instances.
+
+Ceph RBD provides object replication capabilities
+by storing Block Storage volumes as Ceph RBD objects;
+Ceph RBD ensures that each replica of an object
+is stored on a different node.
+This means that your volumes are protected against
+hard drive and node failures
+or even the failure of the data center itself.
+
+When Ceph RBD is used for ephemeral volumes
+as well as block and image storage, it supports
+`live migration
+<http://docs.openstack.org/admin-guide/compute-live-migration-usage.html>`_
+of VMs with ephemeral drives;
+LVM only supports live migration of volume-backed VMs.
+
+Remote backup facilities
+------------------------
+
+[TODO: Add discussion of remote backup facilities
+as an alternate way to secure ones data.
+Include brief mention of key third-party technologies
+with links to their documentation]
+
+
--- a/doc/ha-guide/source/storage-ha-cinder.rst
+++ b/doc/ha-guide/source/storage-ha-cinder.rst
@@ -0,0 +1,238 @@
+.. highlight: ini
+   :linenothreshold: 5
+
+==================================
+Highly available Block Storage API
+==================================
+
+Cinder provides 'block storage as a service' suitable for performance
+sensitive scenarios such as databases, expandable file systems, or
+providing a server with access to raw block level storage.
+
+Persistent block storage can survive instance termination and can also
+be moved across instances like any external storage device. Cinder
+also has volume snapshots capability for backing up the volumes.
+
+Making this Block Storage API service highly available in
+active/passive mode involves:
+
+- :ref:`ha-cinder-pacemaker`
+- :ref:`ha-cinder-configure`
+- :ref:`ha-cinder-services`
+
+In theory, you can run the Block Storage service as active/active.
+However, because of sufficient concerns, it is recommended running
+the volume component as active/passive only.
+
+Jon Bernard writes:
+
+::
+
+  Requests are first seen by Cinder in the API service, and we have a
+  fundamental problem there - a standard test-and-set race condition
+  exists for many operations where the volume status is first checked
+  for an expected status and then (in a different operation) updated to
+  a pending status. The pending status indicates to other incoming
+  requests that the volume is undergoing a current operation, however it
+  is possible for two simultaneous requests to race here, which
+  undefined results.
+
+  Later, the manager/driver will receive the message and carry out the
+  operation. At this stage there is a question of the synchronization
+  techniques employed by the drivers and what guarantees they make.
+
+  If cinder-volume processes exist as different process, then the
+  'synchronized' decorator from the lockutils package will not be
+  sufficient. In this case the programmer can pass an argument to
+  synchronized() 'external=True'. If external is enabled, then the
+  locking will take place on a file located on the filesystem. By
+  default, this file is placed in Cinder's 'state directory' in
+  /var/lib/cinder so won't be visible to cinder-volume instances running
+  on different machines.
+
+  However, the location for file locking is configurable. So an
+  operator could configure the state directory to reside on shared
+  storage. If the shared storage in use implements unix file locking
+  semantics, then this could provide the requisite synchronization
+  needed for an active/active HA configuration.
+
+  The remaining issue is that not all drivers use the synchronization
+  methods, and even fewer of those use the external file locks. A
+  sub-concern would be whether they use them correctly.
+
+You can read more about these concerns on the
+`Red Hat Bugzilla <https://bugzilla.redhat.com/show_bug.cgi?id=1193229>`_
+and there is a
+`psuedo roadmap <https://etherpad.openstack.org/p/cinder-kilo-stabilisation-work>`_
+for addressing them upstream.
+
+
+.. _ha-cinder-pacemaker:
+
+Add Block Storage API resource to Pacemaker
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+On RHEL-based systems, you should create resources for cinder's
+systemd agents and create constraints to enforce startup/shutdown
+ordering:
+
+.. code-block:: console
+
+  pcs resource create openstack-cinder-api systemd:openstack-cinder-api --clone interleave=true
+  pcs resource create openstack-cinder-scheduler systemd:openstack-cinder-scheduler --clone interleave=true
+  pcs resource create openstack-cinder-volume systemd:openstack-cinder-volume
+
+  pcs constraint order start openstack-cinder-api-clone then openstack-cinder-scheduler-clone
+  pcs constraint colocation add openstack-cinder-scheduler-clone with openstack-cinder-api-clone
+  pcs constraint order start openstack-cinder-scheduler-clone then openstack-cinder-volume
+  pcs constraint colocation add openstack-cinder-volume with openstack-cinder-scheduler-clone
+
+
+If the Block Storage service runs on the same nodes as the other services,
+then it is advisable to also include:
+
+.. code-block:: console
+
+   pcs constraint order start openstack-keystone-clone then openstack-cinder-api-clone
+
+Alternatively, instead of using systemd agents, download and
+install the OCF resource agent:
+
+.. code-block:: console
+
+   # cd /usr/lib/ocf/resource.d/openstack
+   # wget https://git.openstack.org/cgit/openstack/openstack-resource-agents/plain/ocf/cinder-api
+   # chmod a+rx *
+
+You can now add the Pacemaker configuration for Block Storage API resource.
+Connect to the Pacemaker cluster with the :command:`crm configure` command
+and add the following cluster resources:
+
+::
+
+   primitive p_cinder-api ocf:openstack:cinder-api \
+      params config="/etc/cinder/cinder.conf"
+      os_password="secretsecret"
+      os_username="admin" \
+      os_tenant_name="admin"
+      keystone_get_token_url="http://10.0.0.11:5000/v2.0/tokens" \
+      op monitor interval="30s" timeout="30s"
+
+This configuration creates ``p_cinder-api``,
+a resource for managing the Block Storage API service.
+
+The command :command:`crm configure` supports batch input,
+so you may copy and paste the lines above
+into your live pacemaker configuration and then make changes as required.
+For example, you may enter ``edit p_ip_cinder-api``
+from the :command:`crm configure` menu
+and edit the resource to match your preferred virtual IP address.
+
+Once completed, commit your configuration changes
+by entering :command:`commit` from the :command:`crm configure` menu.
+Pacemaker then starts the Block Storage API service
+and its dependent resources on one of your nodes.
+
+.. _ha-cinder-configure:
+
+Configure Block Storage API service
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Edit the ``/etc/cinder/cinder.conf`` file:
+
+On a RHEL-based system, it should look something like:
+
+.. code-block:: ini
+   :linenos:
+
+   [DEFAULT]
+   # This is the name which we should advertise ourselves as and for
+   # A/P installations it should be the same everywhere
+   host = cinder-cluster-1
+
+   # Listen on the Block Storage VIP
+   osapi_volume_listen = 10.0.0.11
+
+   auth_strategy = keystone
+   control_exchange = cinder
+
+   volume_driver = cinder.volume.drivers.nfs.NfsDriver
+   nfs_shares_config = /etc/cinder/nfs_exports
+   nfs_sparsed_volumes = true
+   nfs_mount_options = v3
+
+   [database]
+   sql_connection = mysql://cinder:CINDER_DBPASS@10.0.0.11/cinder
+   max_retries = -1
+
+   [keystone_authtoken]
+   # 10.0.0.11 is the Keystone VIP
+   identity_uri = http://10.0.0.11:35357/
+   auth_uri = http://10.0.0.11:5000/
+   admin_tenant_name = service
+   admin_user = cinder
+   admin_password = CINDER_PASS
+
+   [oslo_messaging_rabbit]
+   # Explicitly list the rabbit hosts as it doesn't play well with HAProxy
+   rabbit_hosts = 10.0.0.12,10.0.0.13,10.0.0.14
+   # As a consequence, we also need HA queues
+   rabbit_ha_queues = True
+   heartbeat_timeout_threshold = 60
+   heartbeat_rate = 2
+
+Replace ``CINDER_DBPASS`` with the password you chose for the Block Storage
+database. Replace ``CINDER_PASS`` with the password you chose for the
+``cinder`` user in the Identity service.
+
+This example assumes that you are using NFS for the physical storage, which
+will almost never be true in a production installation.
+
+If you are using the Block Storage service OCF agent, some settings will
+be filled in for you, resulting in a shorter configuration file:
+
+.. code-block:: ini
+   :linenos:
+
+   # We have to use MySQL connection to store data:
+   sql_connection = mysql://cinder:CINDER_DBPASS@10.0.0.11/cinder
+   # Alternatively, you can switch to pymysql,
+   # a new Python 3 compatible library and use
+   # sql_connection = mysql+pymysql://cinder:CINDER_DBPASS@10.0.0.11/cinder
+   # and be ready when everything moves to Python 3.
+   # Ref: https://wiki.openstack.org/wiki/PyMySQL_evaluation
+
+   # We bind Block Storage API to the VIP:
+   osapi_volume_listen = 10.0.0.11
+
+   # We send notifications to High Available RabbitMQ:
+   notifier_strategy = rabbit
+   rabbit_host = 10.0.0.11
+
+Replace ``CINDER_DBPASS`` with the password you chose for the Block Storage
+database.
+
+.. _ha-cinder-services:
+
+Configure OpenStack services to use highly available Block Storage API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Your OpenStack services must now point their
+Block Storage API configuration to the highly available,
+virtual cluster IP address
+rather than a Block Storage API server’s physical IP address
+as you would for a non-HA environment.
+
+You must create the Block Storage API endpoint with this IP.
+
+If you are using both private and public IP addresses,
+you should create two virtual IPs and define your endpoint like this:
+
+.. code-block:: console
+
+   $ keystone endpoint-create --region $KEYSTONE_REGION \
+      --service-id $service-id \
+      --publicurl 'http://PUBLIC_VIP:8776/v1/%(tenant_id)s' \
+      --adminurl 'http://10.0.0.11:8776/v1/%(tenant_id)s' \
+      --internalurl 'http://10.0.0.11:8776/v1/%(tenant_id)s'
+
--- a/doc/ha-guide/source/storage-ha-glance.rst
+++ b/doc/ha-guide/source/storage-ha-glance.rst
@@ -0,0 +1,130 @@
+====================================
+Highly available OpenStack Image API
+====================================
+
+The OpenStack Image service offers a service for discovering,
+registering, and retrieving virtual machine images.
+To make the OpenStack Image API service highly available
+in active / passive mode, you must:
+
+- :ref:`glance-api-pacemaker`
+- :ref:`glance-api-configure`
+- :ref:`glance-services`
+
+This section assumes that you are familiar with the
+`documentation
+<http://docs.openstack.org/liberty/install-guide-ubuntu/glance.html>`_
+for installing the OpenStack Image API service.
+
+.. _glance-api-pacemaker:
+
+Add OpenStack Image API resource to Pacemaker
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You must first download the resource agent to your system:
+
+.. code-block:: console
+
+   # cd /usr/lib/ocf/resource.d/openstack
+   # wget https://git.openstack.org/cgit/openstack/openstack-resource-agents/plain/ocf/glance-api
+   # chmod a+rx *
+
+You can now add the Pacemaker configuration
+for the OpenStack Image API resource.
+Use the :command:`crm configure` command
+to connect to the Pacemaker cluster
+and add the following cluster resources:
+
+::
+
+   primitive p_glance-api ocf:openstack:glance-api \
+      params config="/etc/glance/glance-api.conf" \
+      os_password="secretsecret" \
+      os_username="admin" os_tenant_name="admin" \
+      os_auth_url="http://10.0.0.11:5000/v2.0/" \
+      op monitor interval="30s" timeout="30s"
+
+This configuration creates ``p_glance-api``,
+a resource for managing the OpenStack Image API service.
+
+The :command:`crm configure` command  supports batch input,
+so you may copy and paste the above into your live Pacemaker configuration
+and then make changes as required.
+For example, you may enter edit ``p_ip_glance-api``
+from the :command:`crm configure` menu
+and edit the resource to match your preferred virtual IP address.
+
+After completing these steps,
+commit your configuration changes by entering :command:`commit`
+from the :command:`crm configure` menu.
+Pacemaker then starts the OpenStack Image API service
+and its dependent resources on one of your nodes.
+
+.. _glance-api-configure:
+
+Configure OpenStack Image service API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Edit the :file:`/etc/glance/glance-api.conf` file
+to configure the OpenStack image service:
+
+.. code-block:: ini
+
+   # We have to use MySQL connection to store data:
+   sql_connection=mysql://glance:password@10.0.0.11/glance
+   # Alternatively, you can switch to pymysql,
+   # a new Python 3 compatible library and use
+   # sql_connection=mysql+pymysql://glance:password@10.0.0.11/glance
+   # and be ready when everything moves to Python 3.
+   # Ref: https://wiki.openstack.org/wiki/PyMySQL_evaluation
+
+   # We bind OpenStack Image API to the VIP:
+   bind_host = 10.0.0.11
+
+   # Connect to OpenStack Image registry service:
+   registry_host = 10.0.0.11
+
+   # We send notifications to High Available RabbitMQ:
+   notifier_strategy = rabbit
+   rabbit_host = 10.0.0.11
+
+[TODO: need more discussion of these parameters]
+
+.. _glance-services:
+
+Configure OpenStack services to use highly available OpenStack Image API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Your OpenStack services must now point
+their OpenStack Image API configuration to the highly available,
+virtual cluster IP address
+instead of pointint to the physical IP address
+of an OpenStack Image API server
+as you would in a non-HA cluster.
+
+For OpenStack Compute, for example,
+if your OpenStack Image API service IP address is 10.0.0.11
+(as in the configuration explained here),
+you would use the following configuration in your :file:`nova.conf` file:
+
+.. code-block:: ini
+
+   [glance]
+   ...
+   api_servers = 10.0.0.11
+   ...
+
+
+You must also create the OpenStack Image API endpoint with this IP address.
+If you are using both private and public IP addresses,
+you should create two virtual IP addresses
+and define your endpoint like this:
+
+.. code-block:: console
+
+   $ keystone endpoint-create --region $KEYSTONE_REGION \
+      --service-id $service-id --publicurl 'http://PUBLIC_VIP:9292' \
+      --adminurl 'http://10.0.0.11:9292' \
+      --internalurl 'http://10.0.0.11:9292'
+
+
--- a/doc/ha-guide/source/storage-ha-manila.rst
+++ b/doc/ha-guide/source/storage-ha-manila.rst
@@ -0,0 +1,101 @@
+.. highlight: ini
+   :linenothreshold: 5
+
+========================================
+Highly available Shared File Systems API
+========================================
+
+Making the Shared File Systems (manila) API service highly available
+in active/passive mode involves:
+
+- :ref:`ha-manila-pacemaker`
+- :ref:`ha-manila-configure`
+- :ref:`ha-manila-services`
+
+.. _ha-manila-pacemaker:
+
+Add Shared File Systems API resource to Pacemaker
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You must first download the resource agent to your system:
+
+.. code-block:: console
+
+   # cd /usr/lib/ocf/resource.d/openstack
+   # wget https://git.openstack.org/cgit/openstack/openstack-resource-agents/plain/ocf/manila-api
+   # chmod a+rx *
+
+You can now add the Pacemaker configuration for the Shared File Systems
+API resource. Connect to the Pacemaker cluster with the
+:command:`crm configure` command and add the following cluster resources:
+
+::
+
+   primitive p_manila-api ocf:openstack:manila-api \
+      params config="/etc/manila/manila.conf"
+      os_password="secretsecret"
+      os_username="admin" \
+      os_tenant_name="admin"
+      keystone_get_token_url="http://10.0.0.11:5000/v2.0/tokens" \
+      op monitor interval="30s" timeout="30s"
+
+This configuration creates ``p_manila-api``, a resource for managing the
+Shared File Systems API service.
+
+The :command:`crm configure` supports batch input, so you may copy and paste
+the lines above into your live Pacemaker configuration and then make changes
+as required. For example, you may enter ``edit p_ip_manila-api`` from the
+:command:`crm configure` menu and edit the resource to match your preferred
+virtual IP address.
+
+Once completed, commit your configuration changes by entering :command:`commit`
+from the :command:`crm configure` menu. Pacemaker then starts the
+Shared File Systems API service and its dependent resources on one of your
+nodes.
+
+.. _ha-manila-configure:
+
+Configure Shared File Systems API service
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Edit the :file:`/etc/manila/manila.conf` file:
+
+.. code-block:: ini
+   :linenos:
+
+   # We have to use MySQL connection to store data:
+   sql_connection = mysql+pymysql://manila:password@10.0.0.11/manila?charset=utf8
+
+   # We bind Shared File Systems API to the VIP:
+   osapi_volume_listen = 10.0.0.11
+
+   # We send notifications to High Available RabbitMQ:
+   notifier_strategy = rabbit
+   rabbit_host = 10.0.0.11
+
+
+.. _ha-manila-services:
+
+Configure OpenStack services to use HA Shared File Systems API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Your OpenStack services must now point their Shared File Systems API
+configuration to the highly available, virtual cluster IP address rather than
+a Shared File Systems API server’s physical IP address as you would
+for a non-HA environment.
+
+You must create the Shared File Systems API endpoint with this IP.
+
+If you are using both private and public IP addresses, you should create two
+virtual IPs and define your endpoints like this:
+
+.. code-block:: console
+
+   $ openstack endpoint create --region RegionOne \
+     sharev2 public 'http://PUBLIC_VIP:8786/v2/%(tenant_id)s'
+
+   $ openstack endpoint create --region RegionOne \
+     sharev2 internal 'http://10.0.0.11:8786/v2/%(tenant_id)s'
+
+   $ openstack endpoint create --region RegionOne \
+     sharev2 admin 'http://10.0.0.11:8786/v2/%(tenant_id)s'
--- a/doc/ha-guide/source/storage-ha.rst
+++ b/doc/ha-guide/source/storage-ha.rst
@@ -0,0 +1,13 @@
+=========================================
+Configuring Storage for high availability
+=========================================
+
+.. toctree::
+   :maxdepth: 2
+
+   storage-ha-cinder.rst
+   storage-ha-glance.rst
+   storage-ha-manila.rst
+   storage-ha-backend.rst
+
+
--- a/tools/build-all-rst.sh
+++ b/tools/build-all-rst.sh
@@ -9,8 +9,8 @@ if [[ $# > 0 ]] ; then
    fi
 fi

-for guide in user-guide admin-guide \
-    contributor-guide image-guide arch-design cli-reference; do
+for guide in admin-guide arch-design cli-reference contributor-guide \
+    ha-guide image-guide user-guide; do
    tools/build-rst.sh doc/$guide --build build \
        --target $guide $LINKCHECK
 done