Merge "[arch-design] Move RST guide to arch-design folder"

2015-11-27 06:32:44 +00:00 · 2015-11-27 06:32:44 +00:00 · b7d4544ff2
commit b7d4544ff2
parent 908c57adcc 9dead00ded
189 changed files with 9 additions and 19787 deletions
--- a/RELEASENOTES.rst
+++ b/RELEASENOTES.rst
@ -35,6 +35,11 @@ Virtual Machine Image Guide
 * RST conversion finished.
 Architecture Design Guide
 -------------------------
 * Completed RST conversion.
 Translations
 ------------
--- a/doc-tools-check-languages.conf
+++ b/doc-tools-check-languages.conf
@ -30,9 +30,9 @@ declare -A SPECIAL_BOOKS=(
    ["networking-guide"]="RST"
    ["user-guide"]="RST"
    ["user-guide-admin"]="RST"
    ["arch-design"]="RST"
    # Skip in-progress guides
    ["contributor-guide"]="skip"
    ["arch-design-rst"]="skip"
    ["config-ref-rst"]="skip"
    # This needs special handling, handle it with the RST tools.
    ["common-rst"]="RST"
--- a/doc/arch-design/bk-openstack-arch-design.xml
+++ b/doc/arch-design/bk-openstack-arch-design.xml
@ -1,64 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <book xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="openstack-arch-design">
    <title>OpenStack Architecture Design Guide</title>
    <?rax title.font.size="28px" subtitle.font.size="28px"?>
    <titleabbrev>Architecture Guide</titleabbrev>
    <info>
        <author>
            <personname>
                <firstname/>
                <surname/>
            </personname>
            <affiliation>
                <orgname>OpenStack Foundation</orgname>
            </affiliation>
        </author>
        <copyright>
           <year>2014</year>
           <year>2015</year>
            <holder>OpenStack Foundation</holder>
        </copyright>
        <releaseinfo>current</releaseinfo>
        <productname>OpenStack</productname>
        <pubdate/>
        <legalnotice role="apache2">
            <annotation>
                <remark>Copyright details are filled in by the
                    template.</remark>
            </annotation>
        </legalnotice>
        <legalnotice role="cc-by">
            <annotation>
                <remark>Remaining licensing details are filled in by
                    the template.</remark>
            </annotation>
        </legalnotice>
        <abstract>
            <para>To reap the benefits of OpenStack, you should
                plan, design, and architect your cloud properly,
                taking user's needs into account and understanding the
                use cases.</para>
        </abstract>
    </info>
    <!-- Chapters are referred from the book file through these
         include statements. You can add additional chapters using
         these types of statements. -->
    <xi:include href="../common/ch_preface.xml"/>
    <xi:include href="ch_introduction.xml"/>
    <xi:include href="ch_legal-security-requirements.xml"/>
    <xi:include href="ch_generalpurpose.xml"/>
    <xi:include href="ch_compute_focus.xml"/>
    <xi:include href="ch_storage_focus.xml"/>
    <xi:include href="ch_network_focus.xml"/>
    <xi:include href="ch_multi_site.xml"/>
    <xi:include href="ch_hybrid.xml"/>
    <xi:include href="ch_massively_scalable.xml"/>
    <xi:include href="ch_specialized.xml"/>
    <xi:include href="ch_references.xml"/>
    <xi:include href="../common/app_support.xml"/>
  <glossary role="auto"/>
 </book>
--- a/doc/arch-design/ch_compute_focus.xml
+++ b/doc/arch-design/ch_compute_focus.xml
@ -1,45 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="compute_focus">
  <title>Compute focused</title>
  <para>Compute-focused clouds are a specialized subset of the general purpose
    OpenStack cloud architecture. A compute-focused cloud specifically supports
    compute intensive workloads.</para>
    <note>
      <para>Compute intensive workloads may be CPU intensive, RAM intensive,
      or both; they are not typically storage or network intensive.</para>
    </note>
  <para>Compute-focused workloads may include the following use cases:</para>
    <itemizedlist>
      <listitem>
        <para>High performance computing (HPC)</para>
      </listitem>
      <listitem>
        <para>Big data analytics using Hadoop or other distributed data
          stores</para>
      </listitem>
      <listitem>
        <para>Continuous integration/continuous deployment (CI/CD)</para>
      </listitem>
      <listitem>
        <para>Platform-as-a-Service (PaaS)</para>
      </listitem>
      <listitem>
        <para>Signal processing for network function virtualization (NFV)</para>
      </listitem>
    </itemizedlist>
  <note>
    <para>A compute-focused OpenStack cloud does not typically use raw block storage
      services as it does not host applications that require
      persistent block storage.</para>
  </note>
    <xi:include href="compute_focus/section_tech_considerations_compute_focus.xml"/>
    <xi:include href="compute_focus/section_operational_considerations_compute_focus.xml"/>
    <xi:include href="compute_focus/section_architecture_compute_focus.xml"/>
    <xi:include href="compute_focus/section_prescriptive_examples_compute_focus.xml"/>
 </chapter>
--- a/doc/arch-design/ch_generalpurpose.xml
+++ b/doc/arch-design/ch_generalpurpose.xml
@ -1,95 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="generalpurpose">
    <title>General purpose</title>
    <para>An OpenStack general purpose cloud is often considered a
        starting point for building a cloud deployment. They are designed
        to balance the components and do not emphasize any particular aspect
        of the overall computing environment.
        Cloud design must give equal weight to the compute, network, and
        storage components. General purpose clouds are
        found in private, public, and hybrid environments, lending
        themselves to many different use cases.
    </para>
      <note>
        <para>
          General purpose clouds are homogeneous deployments. They are
          not suited to specialized environments or edge case situations.
        </para>
      </note>
    <para>
        Common uses of a general purpose cloud include:
    </para>
      <itemizedlist>
        <listitem>
          <para>
          Providing a simple database
          </para>
        </listitem>
        <listitem>
          <para>
          A web application runtime environment
          </para>
        </listitem>
        <listitem>
          <para>
          A shared application development platform
          </para>
        </listitem>
        <listitem>
          <para>
          Lab test bed
          </para>
        </listitem>
      </itemizedlist>
    <para>Use cases that benefit from scale-out rather than scale-up approaches
        are good candidates for general purpose cloud architecture.
    </para>
    <para>A general purpose cloud is designed to have a range of potential
        uses or functions; not specialized for specific use cases. General
        purpose architecture is designed to address 80% of potential use
        cases available. The infrastructure, in itself, is a specific use case,
        enabling it to be used as a base model for the design process.
        General purpose clouds are designed to be platforms that are suited
        for general purpose applications.</para>
    <para>General purpose clouds are limited to the most basic
        components, but they can include additional resources such
        as:</para>
    <itemizedlist>
        <listitem>
            <para>Virtual-machine disk image library</para>
        </listitem>
        <listitem>
            <para>Raw block storage</para>
        </listitem>
        <listitem>
            <para>File or object storage</para>
        </listitem>
        <listitem>
            <para>Firewalls</para>
        </listitem>
        <listitem>
            <para>Load balancers</para>
        </listitem>
        <listitem>
            <para>IP addresses</para>
        </listitem>
        <listitem>
            <para>Network overlays or virtual local area networks
                (VLANs)</para>
        </listitem>
        <listitem>
            <para>Software bundles</para>
        </listitem>
    </itemizedlist>
    <xi:include href="generalpurpose/section_user_requirements_general_purpose.xml"/>
    <xi:include href="generalpurpose/section_tech_considerations_general_purpose.xml"/>
    <xi:include href="generalpurpose/section_operational_considerations_general_purpose.xml"/>
    <xi:include href="generalpurpose/section_architecture_general_purpose.xml"/>
    <xi:include href="generalpurpose/section_prescriptive_example_general_purpose.xml"/>
 </chapter>
--- a/doc/arch-design/ch_hybrid.xml
+++ b/doc/arch-design/ch_hybrid.xml
@ -1,59 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="hybrid">
    <title>Hybrid</title>
    <para>A <glossterm baseform="hybrid cloud">hybrid cloud</glossterm> design
        is one that uses more than one cloud. For example, designs that use
        both an OpenStack-based private cloud and an OpenStack-based public
        cloud, or that use an OpenStack cloud and a non-OpenStack cloud,
        are hybrid clouds.</para>
    <para><glossterm baseform="bursting">Bursting</glossterm> describes the
        practice of creating new instances in an external cloud to alleviate
        capacity issues in a private cloud.</para>
    <itemizedlist>
      <title>Example scenarios suited to hybrid clouds</title>
        <listitem>
            <para>Bursting from a private cloud to a public
                cloud</para>
        </listitem>
        <listitem>
            <para>Disaster recovery</para>
        </listitem>
        <listitem>
            <para>Development and testing</para>
        </listitem>
        <listitem>
            <para>Federated cloud, enabling users to choose resources
                from multiple providers</para>
        </listitem>
        <listitem>
            <para>Supporting legacy systems as they transition to the
                cloud</para>
        </listitem>
    </itemizedlist>
    <para>Hybrid clouds interact with systems that are outside
        the control of the private cloud administrator, and require careful
        architecture to prevent conflicts with hardware, software,
        and APIs under external control.</para>
    <para>The degree to which the architecture is OpenStack-based
        affects your ability to accomplish tasks with native
        OpenStack tools. By definition, this is a situation in which
        no single cloud can provide all of the necessary
        functionality. In order to manage the entire system, we recommend
        using a cloud management platform (CMP).</para>
    <para>There are several commercial and open source CMPs available,
        but there is no single CMP that can address all needs in all scenarios,
        and sometimes a manually-built solution is the best option.
        This chapter includes discussion of using CMPs for managing a hybrid
        cloud.</para>
    <xi:include href="hybrid/section_user_requirements_hybrid.xml"/>
    <xi:include href="hybrid/section_tech_considerations_hybrid.xml"/>
    <xi:include href="hybrid/section_operational_considerations_hybrid.xml"/>
    <xi:include href="hybrid/section_architecture_hybrid.xml"/>
    <xi:include href="hybrid/section_prescriptive_examples_hybrid.xml"/>
 </chapter>
--- a/doc/arch-design/ch_introduction.xml
+++ b/doc/arch-design/ch_introduction.xml
@ -1,18 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="introduction">
    <title>Introduction</title>
    <para><glossterm>OpenStack</glossterm> is a fully-featured, self-service
      cloud. This book takes you through some of the considerations you have to make
      when designing your cloud.</para>
  <xi:include href="introduction/section_intended_audience.xml"/>
  <xi:include href="introduction/section_how_this_book_is_organized.xml"/>
  <xi:include href="introduction/section_how_this_book_was_written.xml"/>
  <xi:include href="introduction/section_methodology.xml"/>
 </chapter>
--- a/doc/arch-design/ch_legal-security-requirements.xml
+++ b/doc/arch-design/ch_legal-security-requirements.xml
@ -1,260 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
         xmlns:xi="http://www.w3.org/2001/XInclude"
         xmlns:xlink="http://www.w3.org/1999/xlink"
         version="5.0"
         xml:id="security-legal-requirements">
  <?dbhtml stop-chunking?>
  <title>Security and legal requirements</title>
  <para>This chapter discusses the legal and security requirements you
    need to consider for the different OpenStack scenarios.</para>
  <section xml:id="legal-requirements">
    <title>Legal requirements</title>
    <para>Many jurisdictions have legislative and regulatory
    requirements governing the storage and management of data in
    cloud environments. Common areas of regulation include:</para>
    <itemizedlist>
      <listitem>
        <para>Data retention policies ensuring storage of
        persistent data and records management to meet data
        archival requirements.</para>
      </listitem>
      <listitem>
        <para>Data ownership policies governing the possession and
        responsibility for data.</para>
      </listitem>
      <listitem>
        <para>Data sovereignty policies governing the storage of
        data in foreign countries or otherwise separate
        jurisdictions.</para>
      </listitem>
      <listitem>
        <para>Data compliance policies governing certain types of
        information needing to reside in certain locations due to
        regulatory issues - and more importantly, cannot reside in
        other locations for the same reason.</para>
      </listitem>
    </itemizedlist>
    <para>Examples of such legal frameworks include the <link
      xlink:href="http://ec.europa.eu/justice/data-protection/">data
      protection framework</link> of the European Union and the
    requirements of the <link
      xlink:href="http://www.finra.org/Industry/Regulation/FINRARules/">
      Financial Industry Regulatory Authority</link> in the United
    States. Consult a local regulatory body for more information.
    </para>
  </section>
  <section xml:id="security-overview">
     <title>Security</title>
     <para>When deploying OpenStack in an enterprise as a private
        cloud, despite activating a firewall and binding
        employees with security agreements, cloud architecture
        should not make assumptions about safety and protection.
        In addition to considering the users, operators, or administrators
        who will use the environment, consider also negative or hostile users who
        would attack or compromise the security of your deployment regardless
        of firewalls or security agreements.</para>
     <para>Attack vectors increase further in a public facing OpenStack
        deployment. For example, the API endpoints and the
        software behind it become vulnerable to hostile
        entities attempting to gain unauthorized access or prevent access
        to services. This can result in loss of reputation and you must
        protect against it through auditing and appropriate
        filtering.</para>
     <para>It is important to understand that user authentication
        requests encase sensitive information such as user names,
        passwords, and authentication tokens. For this reason, place
        the API services behind hardware that performs SSL termination.</para>
          <warning>
            <para>Be mindful of consistency when utilizing third party
            clouds to explore authentication options.</para>
          </warning>
  </section>
  <section xml:id="security-domains">
     <title>Security domains</title>
     <para>A security domain comprises users, applications, servers or
           networks that share common trust requirements and expectations
           within a system. Typically, security domains have the same
           authentication and authorization requirements and users.</para>
     <para>You can map security domains individually to the
           installation, or combine them. For example, some
           deployment topologies combine both guest and data domains onto
           one physical network. In other cases these networks
           are physically separate. Map out the security domains against
           specific OpenStack topologies needs. The domains and their trust requirements
           depend on whether the cloud instance is public, private, or
           hybrid.</para>
   <simplesect>
     <title>Public security domains</title>
     <para>The public security domain is an untrusted area of
           the cloud infrastructure. It can refer to the internet as a
           whole or simply to networks over which the user has no
           authority. Always consider this domain untrusted. For example,
           in a hybrid cloud deployment, any information traversing
           between and beyond the clouds is in the public domain and
           untrustworthy.</para>
   </simplesect>
   <simplesect>
     <title>Guest security domains</title>
     <para>Typically used for compute instance-to-instance traffic, the
           guest security domain handles compute data generated by
           instances on the cloud but not services that support the
           operation of the cloud, such as API calls. Public cloud
           providers and private cloud providers who do not have
           stringent controls on instance use or who allow unrestricted
           internet access to instances should consider this domain to be
           untrusted. Private cloud providers may want to consider this
           network as internal and therefore trusted only if they have
           controls in place to assert that they trust instances and all
           their tenants.</para>
   </simplesect>
   <simplesect>
     <title>Management security domains</title>
     <para>The management security domain is where services interact.
           The networks in this domain transport confidential data such as configuration
           parameters, user names, and passwords. Trust this domain when it is
           behind an organization's firewall in deployments.</para>
   </simplesect>
   <simplesect>
     <title>Data security domains</title>
     <para>The data security domain is concerned primarily with
           information pertaining to the storage services within
           OpenStack. The data that crosses this network has integrity and
           confidentiality requirements. Depending on the type of deployment there
           may also be availability requirements. The trust level of this network
           is heavily dependent on deployment decisions and does not have a default
           level of trust.</para>
   </simplesect>
   </section>
   <section xml:id="hypervisor-security">
     <title>Hypervisor-security</title>
      <para>The hypervisor also requires a security assessment. In a
            public cloud, organizations typically do not have control
            over the choice of hypervisor. Properly securing your
            hypervisor is important. Attacks made upon the
            unsecured hypervisor are called a
            <firstterm>hypervisor breakout</firstterm>.
            Hypervisor breakout describes the event of a
            compromised or malicious instance breaking out of the resource
            controls of the hypervisor and gaining access to the bare
            metal operating system and hardware resources.</para>
      <para>There is not an issue if the security of instances is not important.
            However, enterprises need to avoid vulnerability. The only way to
            do this is to avoid the situation where the instances are running
            on a public cloud. That does not mean that there is a
            need to own all of the infrastructure on which an OpenStack
            installation operates; it suggests avoiding situations in which
            sharing hardware with others occurs.</para>
  </section>
  <section xml:id="security-baremetal">
      <title>Baremetal security</title>
      <para>There are other services worth considering that provide a
            bare metal instance instead of a cloud. In other cases, it is
            possible to replicate a second private cloud by integrating
            with a private Cloud-as-a-Service deployment. The
            organization does not buy the hardware, but also does not share
            with other tenants. It is also possible to use a provider that
            hosts a bare-metal public cloud instance for which the
            hardware is dedicated only to one customer, or a provider that
            offers private Cloud-as-a-Service.</para>
      <important>
        <para>Each cloud implements services differently.
            What keeps data secure in one
            cloud may not do the same in another. Be sure to know the
            security requirements of every cloud that handles the
            organization's data or workloads.</para>
      </important>
      <para>More information on OpenStack Security can be found in the
           <link xlink:href="http://docs.openstack.org/security-guide"><citetitle>OpenStack
           Security Guide</citetitle></link>.</para>
   </section>
   <section xml:id="networking-security">
       <title>Networking Security</title>
       <para>Consider security implications and requirements before designing the
             physical and logical network topologies. Make sure that the networks are
             properly segregated and traffic flows are going to the correct
             destinations without crossing through locations that are undesirable.
             Consider the following example factors:</para>
        <itemizedlist>
          <listitem>
            <para>Firewalls</para>
          </listitem>
          <listitem>
            <para>Overlay interconnects for joining separated tenant networks</para>
          </listitem>
          <listitem>
            <para>Routing through or avoiding specific networks</para>
          </listitem>
        </itemizedlist>
       <para>How networks attach to hypervisors can expose security
             vulnerabilities. To mitigate against exploiting hypervisor breakouts,
             separate networks from other systems and schedule instances for the
             network onto dedicated compute nodes. This prevents attackers
             from having access to the networks from a compromised instance.</para>
   </section>
   <section xml:id="security-multi-site">
       <title>Multi-site security</title>
       <para>Securing a multi-site OpenStack installation brings
             extra challenges. Tenants may expect a tenant-created network
             to be secure. In a multi-site installation the use of a
             non-private connection between sites may be required. This may
             mean that traffic would be visible to third parties and, in
             cases where an application requires security, this issue
             requires mitigation. In these instances, install a VPN or
             encrypted connection between sites to conceal sensitive traffic.</para>
       <para>Another security consideration with regard to multi-site
             deployments is Identity. Centralize authentication within a
             multi-site deployment. Centralization provides a
             single authentication point for users across the deployment,
             as well as a single point of administration for traditional
             create, read, update, and delete operations. Centralized
             authentication is also useful for auditing purposes because
             all authentication tokens originate from the same
             source.</para>
       <para>Just as tenants in a single-site deployment need isolation
             from each other, so do tenants in multi-site installations.
             The extra challenges in multi-site designs revolve around
             ensuring that tenant networks function across regions.
             OpenStack Networking (neutron) does not presently support
             a mechanism to provide this functionality, therefore an
             external system may be necessary to manage these mappings.
             Tenant networks may contain sensitive information requiring
             that this mapping be accurate and consistent to ensure that a
             tenant in one site does not connect to a different tenant in
             another site.</para>
   </section>
   <section xml:id="openstack-components-multi-site">
      <title>OpenStack components</title>
       <para>Most OpenStack installations require a bare minimum set of
             pieces to function. These include OpenStack Identity
             (keystone) for authentication, OpenStack Compute
             (nova) for compute, OpenStack Image service (glance) for image
             storage, OpenStack Networking (neutron) for networking, and
             potentially an object store in the form of OpenStack Object
             Storage (swift). Bringing multi-site into play also demands extra
             components in order to coordinate between regions. Centralized
             Identity service is necessary to provide the single authentication
             point. Centralized dashboard is also recommended to provide a
             single login point and a mapped experience to the API and CLI
             options available. If needed, use a centralized Object Storage service,
             installing the required swift proxy service alongside the Object
             Storage service.</para>
       <para>It may also be helpful to install a few extra options in
             order to facilitate certain use cases. For instance,
             installing DNS service may assist in automatically generating
             DNS domains for each region with an automatically-populated
             zone full of resource records for each instance. This
             facilitates using DNS as a mechanism for determining which
             region would be selected for certain applications.</para>
       <para>Another useful tool for managing a multi-site installation
             is Orchestration (heat). The Orchestration service
             allows the use of templates to define a set of instances to
             be launched together or for scaling existing sets. It can
             set up matching or differentiated groupings based on
             regions. For instance, if an application requires an equally
             balanced number of nodes across sites, the same heat template
             can be used to cover each site with small alterations to only
             the region name.</para>
   </section>
 </chapter>
--- a/doc/arch-design/ch_massively_scalable.xml
+++ b/doc/arch-design/ch_massively_scalable.xml
@ -1,79 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="massively_scalable">
    <title>Massively scalable</title>
    <para>A massively scalable architecture is a cloud
        implementation that is either a very large deployment, such as
        a commercial service provider might build, or
        one that has the capability to support user requests for large
        amounts of cloud resources.</para>
    <para>An example is an infrastructure in which requests to service
        500 or more instances at a time is common. A massively scalable
        infrastructure fulfills such a request without exhausting the
        available cloud infrastructure resources. While the high capital
        cost of implementing such a cloud architecture means that it
        is currently in limited use, many organizations are planning
        for massive scalability in the future.</para>
    <para>A massively scalable OpenStack cloud design presents a
        unique set of challenges and considerations. For the most part
        it is similar to a general purpose cloud architecture, as it
        is built to address a non-specific range of potential use
        cases or functions. Typically, it is rare that particular
        workloads determine the design or configuration of massively
        scalable clouds. The massively scalable cloud is most often
        built as a platform for a variety of workloads. Because private
        organizations rarely require or have the resources for them,
        massively scalable OpenStack clouds are generally built as
        commercial, public cloud offerings.</para>
    <para>Services provided by a massively scalable OpenStack cloud
        include:</para>
    <itemizedlist>
        <listitem>
            <para>Virtual-machine disk image library</para>
        </listitem>
        <listitem>
            <para>Raw block storage</para>
        </listitem>
        <listitem>
            <para>File or object storage</para>
        </listitem>
        <listitem>
            <para>Firewall functionality</para>
        </listitem>
        <listitem>
            <para>Load balancing functionality</para>
        </listitem>
        <listitem>
            <para>Private (non-routable) and public (floating) IP
                addresses</para>
        </listitem>
        <listitem>
            <para>Virtualized network topologies</para>
        </listitem>
        <listitem>
            <para>Software bundles</para>
        </listitem>
        <listitem>
            <para>Virtual compute resources</para>
        </listitem>
    </itemizedlist>
    <para>Like a general purpose cloud, the instances deployed in a
        massively scalable OpenStack cloud do not necessarily use
        any specific aspect of the cloud offering (compute, network,
        or storage). As the cloud grows in scale, the number of
        workloads can cause stress on all the cloud
        components. This adds further stresses to supporting
        infrastructure such as databases and message brokers. The
        architecture design for such a cloud must account for these
        performance pressures without negatively impacting user
        experience.</para>
    <xi:include href="massively_scalable/section_user_requirements_massively_scalable.xml"/>
    <xi:include href="massively_scalable/section_tech_considerations_massively_scalable.xml"/>
    <xi:include href="massively_scalable/section_operational_considerations_massively_scalable.xml"/>
 </chapter>
--- a/doc/arch-design/ch_multi_site.xml
+++ b/doc/arch-design/ch_multi_site.xml
@ -1,34 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="multi_site">
    <title>Multi-site</title>
    <para>OpenStack is capable of running in a multi-region
        configuration. This enables some parts of OpenStack to
        effectively manage a group of sites as a single cloud.</para>
    <para>Some use cases that might indicate a need for a multi-site
        deployment of OpenStack include:</para>
    <itemizedlist>
        <listitem>
            <para>An organization with a diverse geographic
                footprint.</para>
        </listitem>
        <listitem>
            <para>Geo-location sensitive data.</para>
        </listitem>
        <listitem>
            <para>Data locality, in which specific data or
                functionality should be close to users.</para>
        </listitem>
    </itemizedlist>
    <xi:include href="multi_site/section_user_requirements_multi_site.xml"/>
    <xi:include href="multi_site/section_tech_considerations_multi_site.xml"/>
    <xi:include href="multi_site/section_operational_considerations_multi_site.xml"/>
    <xi:include href="multi_site/section_architecture_multi_site.xml"/>
    <xi:include href="multi_site/section_prescriptive_examples_multi_site.xml"/>
 </chapter>
--- a/doc/arch-design/ch_network_focus.xml
+++ b/doc/arch-design/ch_network_focus.xml
@ -1,152 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="network_focus">
    <title>Network focused</title>
    <para>All OpenStack deployments depend on network communication in order
    to function properly due to its service-based nature. In some cases,
    however, the network elevates beyond simple
    infrastructure. This chapter discusses architectures that are more
    reliant or focused on network services. These architectures depend
    on the network infrastructure and require
    network services that perform reliably in order to satisfy user and
    application requirements.</para>
    <para>Some possible use cases include:</para>
    <variablelist>
      <varlistentry>
        <term>Content delivery network</term>
        <listitem>
            <para>This includes streaming video, viewing photographs, or
            accessing any other cloud-based data repository distributed to
            a large number of end users. Network configuration affects
            latency, bandwidth, and the distribution of instances. Therefore,
            it impacts video streaming. Not all video streaming is
            consumer-focused. For example, multicast videos (used for media,
            press conferences, corporate presentations, and web conferencing
            services) can also use a content delivery network.
            The location of the video repository and its relationship to end
            users affects content delivery. Network throughput of the back-end
            systems, as well as the WAN architecture and the cache methodology,
            also affect performance.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Network management functions</term>
        <listitem>
            <para>Use this cloud to provide network service functions built to
            support the delivery of back-end network services such as DNS,
            NTP, or SNMP.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Network service offerings</term>
        <listitem>
            <para>Use this cloud to run customer-facing network tools to
            support services. Examples include VPNs, MPLS private networks,
            and GRE tunnels.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Web portals or web services</term>
        <listitem>
            <para>Web servers are a common application for cloud services,
            and we recommend an understanding of their network requirements.
            The network requires scaling out to meet user demand and deliver
            web pages with a minimum latency. Depending on the details of
            the portal architecture, consider the internal east-west and
            north-south network bandwidth.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>High speed and high volume transactional systems</term>
        <listitem>
            <para>
                These types of applications are sensitive to network
                configurations. Examples include financial systems,
                credit card transaction applications, and trading and other
                extremely high volume systems. These systems are sensitive
                to network jitter and latency. They must balance a high volume
                of East-West and North-South network traffic to
                maximize efficiency of the data delivery.
                Many of these systems must access large, high performance
                database back ends.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>High availability</term>
        <listitem>
            <para>These types of use cases are dependent on the proper sizing
            of the network to maintain replication of data between sites for
            high availability. If one site becomes unavailable, the extra
            sites can serve the displaced load until the original site
            returns to service. It is important to size network capacity
            to handle the desired loads.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Big data</term>
        <listitem>
            <para>Clouds used for the management and collection of big data
            (data ingest) have a significant demand on network resources.
            Big data often uses partial replicas of the data to maintain
            integrity over large distributed clouds. Other big data
            applications that require a large amount of network resources
            are Hadoop, Cassandra, NuoDB, Riak, and other NoSQL and
            distributed databases.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Virtual desktop infrastructure (VDI)</term>
        <listitem>
            <para>This use case is sensitive to network congestion, latency,
            jitter, and other network characteristics. Like video streaming,
            the user experience is important. However, unlike video
            streaming, caching is not an option to offset the network issues.
            VDI requires both upstream and downstream traffic and cannot rely
            on caching for the delivery of the application to the end user.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Voice over IP (VoIP)</term>
        <listitem>
            <para>This is sensitive to network congestion, latency, jitter,
            and other network characteristics. VoIP has a symmetrical traffic
            pattern and it requires network quality of service (QoS) for best
            performance. In addition, you can implement active queue management
            to deliver voice and multimedia content. Users are sensitive to
            latency and jitter fluctuations and can detect them at very low
            levels.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Video Conference or web conference</term>
        <listitem>
            <para>This is sensitive to network congestion, latency, jitter,
            and other network characteristics. Video Conferencing has a
            symmetrical traffic pattern, but unless the network is on an
            MPLS private network, it cannot use network quality of service
            (QoS) to improve performance. Similar to VoIP, users are
            sensitive to network performance issues even at low levels.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>High performance computing (HPC)</term>
        <listitem>
            <para>This is a complex use case that requires careful
            consideration of the traffic flows and usage patterns to address
            the needs of cloud clusters. It has high east-west traffic
            patterns for distributed computing, but there can be substantial
            north-south traffic depending on the specific application.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    <xi:include href="network_focus/section_user_requirements_network_focus.xml"/>
    <xi:include href="network_focus/section_tech_considerations_network_focus.xml"/>
    <xi:include href="network_focus/section_operational_considerations_network_focus.xml"/>
    <xi:include href="network_focus/section_architecture_network_focus.xml"/>
    <xi:include href="network_focus/section_prescriptive_examples_network_focus.xml"/>
 </chapter>
--- a/doc/arch-design/ch_references.xml
+++ b/doc/arch-design/ch_references.xml
@ -1,128 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="arch-design-references">
  <?dbhtml stop-chunking?>
  <title>References</title>
  <para>
    <link
        xlink:href="http://ec.europa.eu/justice/data-protection/">Data
    Protection framework of the European Union</link>: Guidance on
    Data Protection laws governed by the EU.
  </para>
  <para>
    <link
    xlink:href="http://www.internetsociety.org/deploy360/blog/2014/05/goodbye-ipv4-iana-starts-allocating-final-address-blocks/">Depletion
    of IPv4 Addresses</link>: describing how IPv4 addresses and the
    migration to IPv6 is inevitable.
  </para>
  <para>
    <link
    xlink:href="http://www.garrettcom.com/techsupport/papers/ethernet_switch_reliability.pdf">Ethernet
    Switch Reliability</link>: Research white paper on Ethernet Switch
    reliability.
  </para>
  <para>
    <link
    xlink:href="http://www.finra.org/Industry/Regulation/FINRARules/">Financial
    Industry Regulatory Authority</link>: Requirements of the
    Financial Industry Regulatory Authority in the USA.
  </para>
  <para>
    <link
    xlink:href="http://docs.openstack.org/cli-reference/content/chapter_cli-glance-property.html">Image
    Service property keys</link>: Glance API property keys allows the
    administrator to attach custom characteristics to images.
  </para>
  <para>
    <link xlink:href="http://libguestfs.org">LibGuestFS
    Documentation</link>: Official LibGuestFS documentation.
  </para>
  <para>
    <link
    xlink:href="http://docs.openstack.org/openstack-ops/content/logging_monitoring.html">Logging
    and Monitoring</link>: Official OpenStack Operations
    documentation.
  </para>
  <para>
    <link xlink:href="http://manageiq.org/">ManageIQ Cloud Management
    Platform</link>: An Open Source Cloud Management Platform for
    managing multiple clouds.
  </para>
  <para>
    <link
    xlink:href="http://www.n-tron.com/pdf/network_availability.pdf">N-Tron
    Network Availability</link>: Research white paper on network
    availability.
  </para>
  <para>
    <link
    xlink:href="http://davejingtian.org/2014/03/30/nested-kvm-just-for-fun">Nested
    KVM</link>: Post on how to nest KVM under KVM.
  </para>
  <para>
    <link xlink:href="http://www.opencompute.org/">Open Compute
    Project</link>: The Open Compute Project Foundation's mission is
    to design and enable the delivery of the most efficient server,
    storage and data center hardware designs for scalable
    computing.
  </para>
  <para>
    <link
    xlink:href="http://docs.openstack.org/openstack-ops/content/flavors.html">OpenStack
    Flavors</link>: Official OpenStack documentation.
  </para>
  <para>
    <link
    xlink:href="http://docs.openstack.org/ha-guide/">OpenStack
    High Availability Guide</link>: Information on how to provide
    redundancy for the OpenStack components.
  </para>
  <para>
    <link
    xlink:href="https://wiki.openstack.org/wiki/HypervisorSupportMatrix">OpenStack
    Hypervisor Support Matrix</link>: Matrix of supported hypervisors
    and capabilities when used with OpenStack.
  </para>
  <para>
    <link
    xlink:href="http://docs.openstack.org/developer/swift/replication_network.html">OpenStack
    Object Store (Swift) Replication Reference</link>: Developer
    documentation of Swift replication.
  </para>
  <para>
    <link
    xlink:href="http://docs.openstack.org/openstack-ops/">OpenStack
    Operations Guide</link>: The OpenStack Operations Guide provides
    information on setting up and installing OpenStack.
  </para>
  <para>
    <link
    xlink:href="http://docs.openstack.org/security-guide/">OpenStack
    Security Guide</link>: The OpenStack Security Guide provides
    information on securing OpenStack deployments.
  </para>
  <para>
    <link
    xlink:href="http://www.openstack.org/marketplace/training">OpenStack
    Training Marketplace</link>: The OpenStack Market for training and
    Vendors providing training on OpenStack.
  </para>
  <para>
    <link
    xlink:href="https://wiki.openstack.org/wiki/Pci_passthrough#How_to_check_PCI_status_with_PCI_api_paches">PCI
    passthrough</link>: The PCI API patches extend the
    servers/os-hypervisor to show PCI information for instance and
    compute node, and also provides a resource endpoint to show PCI
    information.
  </para>
  <para>
    <link
    xlink:href="https://wiki.openstack.org/wiki/TripleO">TripleO</link>:
    TripleO is a program aimed at installing, upgrading and operating
    OpenStack clouds using OpenStack's own cloud facilities as the
    foundation.
  </para>
 </chapter>
--- a/doc/arch-design/ch_specialized.xml
+++ b/doc/arch-design/ch_specialized.xml
@ -1,67 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="specialized">
    <title>Specialized cases</title>
    <para>Although most OpenStack architecture designs fall into one
        of the seven major scenarios outlined in other sections
        (compute focused, network focused, storage focused, general
        purpose, multi-site, hybrid cloud, and massively scalable),
        there are a few use cases that do not fit into these categories.
        This section discusses these specialized cases and provide
        some additional details and design considerations
        for each use case:</para>
    <itemizedlist>
        <listitem>
            <para>
              <link
              linkend="specialized-networking-example">Specialized
              networking</link>: describes running
              networking-oriented software that may involve reading
              packets directly from the wire or participating in
              routing protocols.
            </para>
        </listitem>
        <listitem>
            <para>
              <link
              linkend="software-defined-networking-sdn">Software-defined
              networking (SDN)</link>: describes both
              running an SDN controller from within OpenStack as well
              as participating in a software-defined network.
            </para>
        </listitem>
        <listitem>
            <para>
              <link
              linkend="desktop-as-a-service">Desktop-as-a-Service</link>:
              describes running a virtualized desktop environment
              in a cloud (<glossterm>Desktop-as-a-Service</glossterm>).
              This applies to private and public clouds.
            </para>
        </listitem>
        <listitem>
            <para>
              <link
              linkend="arch-guide-openstack-on-openstack">OpenStack on
              OpenStack</link>: describes building a multi-tiered cloud by
              running OpenStack on top of an OpenStack installation.
            </para>
        </listitem>
        <listitem>
            <para>
              <link linkend="specialized-hardware">Specialized
              hardware</link>: describes the use of specialized
              hardware devices from within the OpenStack environment.
            </para>
        </listitem>
    </itemizedlist>
    <xi:include href="specialized/section_multi_hypervisor_specialized.xml"/>
    <xi:include href="specialized/section_networking_specialized.xml"/>
    <xi:include href="specialized/section_software_defined_networking_specialized.xml"/>
    <xi:include href="specialized/section_desktop_as_a_service_specialized.xml"/>
    <xi:include href="specialized/section_openstack_on_openstack_specialized.xml"/>
    <xi:include href="specialized/section_hardware_specialized.xml"/>
 </chapter>
--- a/doc/arch-design/ch_storage_focus.xml
+++ b/doc/arch-design/ch_storage_focus.xml
@ -1,78 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <chapter xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="storage_focus">
    <title>Storage focused</title>
    <para>Cloud storage is a model of data storage that stores digital
        data in logical pools and physical storage that spans
        across multiple servers and locations. Cloud storage commonly
        refers to a hosted object storage service, however the term
        also includes other types of data storage that are
        available as a service, for example block storage.</para>
    <para>Cloud storage runs on virtualized infrastructure and
        resembles broader cloud computing in terms of accessible
        interfaces, elasticity, scalability, multi-tenancy, and
        metered resources. You can use cloud storage services from
        an off-premises service or deploy on-premises.</para>
    <para>Cloud storage consists of many distributed, synonymous
        resources, which are often referred to as integrated
        storage clouds. Cloud storage is highly fault tolerant through
        redundancy and the distribution of data. It is highly durable
        through the creation of versioned copies, and can be
        consistent with regard to data replicas.</para>
    <para>At large scale, management of data operations is
        a resource intensive process for an organization. Hierarchical
        storage management (HSM) systems and data grids help
        annotate and report a baseline data valuation to make
        intelligent decisions and automate data decisions. HSM enables
        automated tiering and movement, as well as orchestration
        of data operations. A data grid is an architecture, or set of
        services evolving technology, that brings together sets of
        services enabling users to manage large data sets.</para>
    <para>Example applications deployed with cloud
        storage characteristics:</para>
    <itemizedlist>
        <listitem>
            <para>Active archive, backups and hierarchical storage
                management.</para>
        </listitem>
        <listitem>
            <para>General content storage and synchronization. An
                example of this is private dropbox.</para>
        </listitem>
        <listitem>
            <para>Data analytics with parallel file systems.</para>
        </listitem>
        <listitem>
            <para>Unstructured data store for services. For example,
                social media back-end storage.</para>
        </listitem>
        <listitem>
            <para>Persistent block storage.</para>
        </listitem>
        <listitem>
            <para>Operating system and application image store.</para>
        </listitem>
        <listitem>
            <para>Media streaming.</para>
        </listitem>
        <listitem>
            <para>Databases.</para>
        </listitem>
        <listitem>
            <para>Content distribution.</para>
        </listitem>
        <listitem>
            <para>Cloud storage peering.</para>
        </listitem>
    </itemizedlist>
    <xi:include href="storage_focus/section_tech_considerations_storage_focus.xml"/>
    <xi:include href="storage_focus/section_operational_considerations_storage_focus.xml"/>
    <xi:include href="storage_focus/section_architecture_storage_focus.xml"/>
    <xi:include href="storage_focus/section_prescriptive_examples_storage_focus.xml"/>
 </chapter>
--- a/doc/arch-design/compute_focus/section_architecture_compute_focus.xml
+++ b/doc/arch-design/compute_focus/section_architecture_compute_focus.xml
@ -1,268 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="arch-design-architecture-hardware">
  <?dbhtml stop-chunking?>
  <title>Architecture</title>
    <para>The hardware selection covers three areas:</para>
      <itemizedlist>
        <listitem>
          <para>Compute</para>
        </listitem>
        <listitem>
          <para>Network</para>
        </listitem>
        <listitem>
          <para>Storage</para>
        </listitem>
      </itemizedlist>
  <para>Compute-focused OpenStack clouds have high demands on processor and
    memory resources, and requires hardware that can handle these demands.
    Consider the following factors when selecting compute (server) hardware:</para>
    <itemizedlist>
      <listitem>
        <para>Server density</para>
      </listitem>
      <listitem>
        <para>Resource capacity</para>
      </listitem>
      <listitem>
        <para>Expandability</para>
      </listitem>
      <listitem>
        <para>Cost</para>
      </listitem>
    </itemizedlist>
  <para>Weigh these considerations against each other to determine the
    best design for the desired purpose. For example, increasing server density
    means sacrificing resource capacity or expandability.</para>
  <para>A compute-focused cloud should have an emphasis on server hardware
    that can offer more CPU sockets, more CPU cores, and more RAM. Network
    connectivity and storage capacity are less critical.</para>
  <para>When designing a compute-focused OpenStack architecture, you must
    consider whether you intend to scale up or scale out.
    Selecting a smaller number of larger hosts, or a
    larger number of smaller hosts, depends on a combination of factors:
    cost, power, cooling, physical rack and floor space, support-warranty,
    and manageability.</para>
  <para>Considerations for selecting hardware:</para>
    <itemizedlist>
      <listitem>
        <para>Most blade servers can support dual-socket multi-core CPUs. To
          avoid this CPU limit, select <literal>full width</literal>
          or <literal>full height</literal> blades.
          Be aware, however, that this also decreases server density. For example,
          high density blade servers such as HP BladeSystem or Dell PowerEdge
          M1000e support up to 16 servers in only ten rack units. Using
          half-height blades is twice as dense as using full-height blades,
          which results in only eight servers per ten rack units.</para>
      </listitem>
      <listitem>
        <para>1U rack-mounted servers that occupy only a single rack
          unit may offer greater server density than a blade server
          solution. It is possible to place forty 1U servers in a rack, providing
          space for the top of rack (ToR) switches, compared to 32 full width
          blade servers.</para>
      </listitem>
      <listitem>
        <para>2U rack-mounted servers provide quad-socket, multi-core CPU
          support, but with a corresponding decrease in server density (half
          the density that 1U rack-mounted servers offer).</para>
      </listitem>
      <listitem>
        <para>Larger rack-mounted servers, such as 4U servers, often provide
          even greater CPU capacity, commonly supporting four or even eight CPU
          sockets. These servers have greater expandability, but such servers
          have much lower server density and are often more expensive.</para>
      </listitem>
      <listitem>
        <para><literal>Sled servers</literal> are rack-mounted servers that
          support multiple
          independent servers in a single 2U or 3U enclosure. These deliver higher
          density as compared to typical 1U or 2U rack-mounted servers. For
          example, many sled servers offer four independent dual-socket
          nodes in 2U for a total of eight CPU sockets in 2U.</para>
      </listitem>
    </itemizedlist>
  <para>Consider these when choosing server hardware for a compute-
    focused OpenStack design architecture:</para>
    <itemizedlist>
      <listitem>
        <para>Instance density</para>
      </listitem>
      <listitem>
        <para>Host density</para>
      </listitem>
      <listitem>
        <para>Power and cooling density</para>
      </listitem>
    </itemizedlist>
    <section xml:id="selecting-networking-hardware-arch">
      <title>Selecting networking hardware</title>
        <para>Some of the key considerations for networking hardware selection
          include:</para>
          <itemizedlist>
            <listitem>
              <para>Port count</para>
            </listitem>
            <listitem>
              <para>Port density</para>
            </listitem>
            <listitem>
              <para>Port speed</para>
            </listitem>
            <listitem>
              <para>Redundancy</para>
            </listitem>
            <listitem>
              <para>Power requirements</para>
            </listitem>
          </itemizedlist>
        <para>We recommend designing the network architecture using
          a scalable network model that makes it easy to add capacity and
          bandwidth. A good example of such a model is the leaf-spline model. In
          this type of network design, it is possible to easily add additional
          bandwidth as well as scale out to additional racks of gear. It is
          important to select network hardware that supports the required
          port count, port speed, and port density while also allowing for future
          growth as workload demands increase. It is also important to evaluate
          where in the network architecture it is valuable to provide redundancy.</para>
    </section>
    <section xml:id="os-and-hypervisor-arch">
      <title>Operating system and hypervisor</title>
        <para>The selection of operating system (OS) and hypervisor has a
          significant impact on the end point design.</para>
        <para>OS and hypervisor selection impact the following areas:</para>
          <itemizedlist>
            <listitem>
              <para>Cost</para>
            </listitem>
            <listitem>
              <para>Supportability</para>
            </listitem>
            <listitem>
              <para>Management tools</para>
            </listitem>
            <listitem>
              <para>Scale and performance</para>
            </listitem>
            <listitem>
              <para>Security</para>
            </listitem>
            <listitem>
              <para>Supported features</para>
            </listitem>
            <listitem>
              <para>Interoperability</para>
            </listitem>
          </itemizedlist>
    </section>
    <section xml:id="openstack-components-arch">
      <title>OpenStack components</title>
        <para>The selection of OpenStack components is important.
          There are certain components that are required, for example the compute
          and image services, but others, such as the Orchestration service, may not
          be present.</para>
        <para>For a compute-focused OpenStack design architecture, the
          following components may be present:</para>
          <itemizedlist>
            <listitem>
              <para>Identity (keystone)</para>
            </listitem>
            <listitem>
              <para>Dashboard (horizon)</para>
            </listitem>
            <listitem>
              <para>Compute (nova)</para>
            </listitem>
            <listitem>
              <para>Object Storage (swift)</para>
            </listitem>
            <listitem>
              <para>Image (glance)</para>
            </listitem>
            <listitem>
              <para>Networking (neutron)</para>
            </listitem>
            <listitem>
              <para>Orchestration (heat)</para>
            </listitem>
          </itemizedlist>
        <note>
          <para>A compute-focused design is less likely to include OpenStack Block
            Storage. However, there may be some situations where the need for
            performance requires a block storage component to improve data I-O.</para>
        </note>
        <para>The exclusion of certain OpenStack components might also limit the
          functionality of other components. If a design includes
          the Orchestration service but excludes the Telemetry service, then
          the design cannot take advantage of Orchestration's auto
          scaling functionality as this relies on information from Telemetry.</para>
    </section>
    <section xml:id="networking-software-arch">
        <title>Networking software</title>
          <para>OpenStack Networking provides a wide variety of networking services
            for instances. There are many additional networking software packages
            that might be useful to manage the OpenStack components themselves.
            The <citetitle>OpenStack High Availability Guide</citetitle>
            (<link xlink:href="http://docs.openstack.org/ha-guide/">http://docs.openstack.org/ha-guide/</link>)
            describes some of these software packages in more detail.
          </para>
          <para>For a compute-focused OpenStack cloud, the OpenStack infrastructure
            components must be highly available. If the design does not
            include hardware load balancing, you must add networking software packages,
            for example, HAProxy.</para>
    </section>
    <section xml:id="management-software-arch">
        <title>Management software</title>
          <para>The selected supplemental software solution impacts and affects
            the overall OpenStack cloud design. This includes software for
            providing clustering, logging, monitoring and alerting.</para>
          <para>The availability of design requirements is the main determiner
            for the inclusion of clustering software, such as Corosync or Pacemaker.</para>
          <para>Operational considerations determine the requirements for logging,
            monitoring, and alerting. Each of these sub-categories include
            various options.</para>
          <para>Some other potential design impacts include:</para>
            <variablelist>
              <varlistentry>
                <term>OS-hypervisor combination</term>
              <listitem>
               <para>Ensure that the selected logging,
                 monitoring, or alerting tools support the proposed OS-hypervisor
                 combination.</para>
              </listitem>
              </varlistentry>
              <varlistentry>
                <term>Network hardware</term>
              <listitem>
                <para>The logging, monitoring, and alerting software
                  must support the network hardware selection.</para>
              </listitem>
              </varlistentry>
            </variablelist>
    </section>
    <section xml:id="database-software-arch">
        <title>Database software</title>
          <para>A large majority of OpenStack components require access to
            back-end database services to store state and configuration
            information. Select an appropriate back-end database that
            satisfies the availability and fault tolerance requirements of the
            OpenStack services. OpenStack services support connecting
            to any database that the SQLAlchemy Python drivers support,
            however most common database deployments make use of MySQL or some
            variation of it. We recommend that you make the database that provides
            back-end services within a general-purpose cloud highly
            available. Some of the more common software solutions include Galera,
            MariaDB, and MySQL with multi-master replication.</para>
    </section>
 </section>
--- a/doc/arch-design/compute_focus/section_operational_considerations_compute_focus.xml
+++ b/doc/arch-design/compute_focus/section_operational_considerations_compute_focus.xml
@ -1,84 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="operational-considerations-compute-focus">
  <?dbhtml stop-chunking?>
  <title>Operational considerations</title>
  <para>There are a number of operational considerations that affect the
    design of compute-focused OpenStack clouds, including:</para>
    <itemizedlist>
      <listitem>
        <para>
          Enforcing strict API availability requirements
        </para>
      </listitem>
      <listitem>
        <para>
          Understanding and dealing with failure scenarios
        </para>
      </listitem>
      <listitem>
        <para>
          Managing host maintenance schedules
        </para>
      </listitem>
    </itemizedlist>
  <para>Service-level agreements (SLAs) are contractual obligations that
    ensure the availability of a service. When designing an OpenStack cloud,
    factoring in promises of availability implies a certain level of
    redundancy and resiliency.</para>
  <section xml:id="montioring-compute-focus">
    <title>Monitoring</title>
    <para>OpenStack clouds require appropriate monitoring platforms
      to catch and manage errors.</para>
      <note>
        <para>We recommend leveraging existing monitoring systems
          to see if they are able to effectively monitor an
          OpenStack environment.</para>
      </note>
    <para>Specific meters that are critically important to capture
      include:</para>
      <itemizedlist>
        <listitem>
          <para>Image disk utilization</para>
        </listitem>
        <listitem>
          <para>Response time to the Compute API</para>
        </listitem>
      </itemizedlist>
  </section>
  <section xml:id="capacity-planning-operational">
    <title>Capacity planning</title>
    <para>Adding extra capacity to an OpenStack cloud is a
      horizontally scaling process.</para>
    <para>We recommend similar (or the same) CPUs
      when adding extra nodes to the environment. This reduces
      the chance of breaking live-migration features if they are
      present. Scaling out hypervisor hosts also has a direct effect
      on network and other data center resources. We recommend you
      factor in this increase when reaching rack capacity or when requiring
      extra network switches.</para>
    <para>Changing the internal components of a Compute host to account for
      increases in demand is a process known as vertical scaling.
      Swapping a CPU for one with more cores, or
      increasing the memory in a server, can help add extra
      capacity for running applications.</para>
    <para>Another option is to assess the average workloads and
      increase the number of instances that can run within the
      compute environment by adjusting the overcommit ratio.</para>
      <note>
        <para>It is important to remember that changing the CPU
          overcommit ratio can have a detrimental effect and cause
          a potential increase in a noisy neighbor.</para>
      </note>
    <para>The added risk of increasing the overcommit ratio is that
      more instances fail when a compute host fails. We do not recommend
      that you increase the CPU overcommit ratio in compute-focused
      OpenStack design architecture, as it can increase the potential
      for noisy neighbor issues.</para>
  </section>
 </section>
--- a/doc/arch-design/compute_focus/section_prescriptive_examples_compute_focus.xml
+++ b/doc/arch-design/compute_focus/section_prescriptive_examples_compute_focus.xml
@ -1,162 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="prescriptive-example-compute-focus">
    <?dbhtml stop-chunking?>
    <title>Prescriptive examples</title>
    <para>The Conseil Européen pour la Recherche Nucléaire (CERN),
        also known as the European Organization for Nuclear Research,
        provides particle accelerators and other infrastructure for
        high-energy physics research.</para>
    <para>As of 2011 CERN operated these two compute centers in Europe
        with plans to add a third.</para>
    <informaltable rules="all">
        <col width="40%" />
        <col width="60%" />
        <thead>
            <tr><th>Data center</th><th>Approximate capacity</th></tr>
            </thead>
        <tbody>
            <tr>
                <td>Geneva, Switzerland</td>
                <td>
                    <itemizedlist>
                        <listitem><para>3.5 Mega Watts</para></listitem>
                        <listitem><para>91000 cores</para></listitem>
                        <listitem><para>120 PB HDD</para></listitem>
                        <listitem><para>100 PB Tape</para></listitem>
                        <listitem><para>310 TB Memory</para></listitem>
                    </itemizedlist>
                </td>
            </tr>
            <tr>
                <td>Budapest, Hungary</td>
                <td>
                    <itemizedlist>
                        <listitem><para>2.5 Mega Watts</para></listitem>
                        <listitem><para>20000 cores</para></listitem>
                        <listitem><para>6 PB HDD</para></listitem>
                    </itemizedlist>
                </td>
            </tr>
        </tbody>
    </informaltable>
    <para>To support a growing number of compute-heavy users of
        experiments related to the Large Hadron Collider (LHC), CERN
        ultimately elected to deploy an OpenStack cloud using
        Scientific Linux and RDO. This effort aimed to simplify the
        management of the center's compute resources with a view to
        doubling compute capacity through the addition of a
        data center in 2013 while maintaining the same
        levels of compute staff.</para>
    <para>The CERN solution uses <glossterm baseform="cell">cells</glossterm>
        for segregation of compute
        resources and for transparently scaling between different data
        centers. This decision meant trading off support for security
        groups and live migration. In addition, they must manually replicate
        some details, like flavors, across cells. In
        spite of these drawbacks cells provide the
        required scale while exposing a single public API endpoint to
        users.</para>
    <para>CERN created a compute cell for each of the two original data
        centers and created a third when it added a new data center
        in 2013. Each cell contains three availability zones to
        further segregate compute resources and at least three
        RabbitMQ message brokers configured for clustering with
        mirrored queues for high availability.</para>
    <para>The API cell, which resides behind a HAProxy load balancer,
        is in the data center in Switzerland and directs API
        calls to compute cells using a customized variation of the
        cell scheduler. The customizations allow certain workloads to
        route to a specific data center or all data centers,
        with cell RAM availability determining cell selection in the
        latter case.</para>
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in" fileref="../figures/Generic_CERN_Example.png"/>
        </imageobject>
    </mediaobject>
    <para>There is also some customization of the filter scheduler
        that handles placement within the cells:</para>
    <variablelist>
      <varlistentry><term>ImagePropertiesFilter</term>
        <listitem>
          <para>Provides special handling
                depending on the guest operating system in use
                (Linux-based or Windows-based).</para>
        </listitem>
      </varlistentry>
      <varlistentry><term>ProjectsToAggregateFilter</term>
        <listitem><para>Provides special
                handling depending on which project the instance is
                associated with.</para>
        </listitem>
      </varlistentry>
      <varlistentry><term>default_schedule_zones</term>
        <listitem><para>Allows the selection of
                multiple default availability zones, rather than a
                single default.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    <para>A central database team manages the MySQL database server in each cell
        in an active/passive configuration with a NetApp storage back end.
        Backups run every 6 hours.</para>
    <section xml:id="network-architecture">
      <title>Network architecture</title>
    <para>To integrate with existing networking infrastructure, CERN
        made customizations to legacy networking (nova-network). This was in the
        form of a driver to integrate with CERN's existing database
        for tracking MAC and IP address assignments.</para>
    <para>The driver facilitates selection of a MAC address and IP for
        new instances based on the compute node where the scheduler places
        the instance.</para>
    <para>The driver considers the compute node where the scheduler
        placed an instance and selects a MAC address and IP
        from the pre-registered list associated with that node in the
        database. The database updates to reflect the address assignment to
        that instance.</para>
    </section>
    <section xml:id="storage-architecture">
      <title>Storage architecture</title>
    <para>CERN deploys the OpenStack Image service in the API cell and
        configures it to expose version 1 (V1) of the API. This also requires
        the image registry. The storage back end in
        use is a 3 PB Ceph cluster.</para>
    <para>CERN maintains a small set of Scientific Linux 5 and 6 images onto
        which orchestration tools can place applications. Puppet manages
        instance configuration and customization.</para>
    </section>
    <section xml:id="monitoring">
      <title>Monitoring</title>
    <para>CERN does not require direct billing, but uses the Telemetry service
        to perform metering for the purposes of adjusting
        project quotas. CERN uses a sharded, replicated, MongoDB back-end.
        To spread API load, CERN deploys instances of the nova-api service
        within the child cells for Telemetry to query
        against. This also requires the configuration of supporting services
        such as keystone, glance-api, and glance-registry in the child cells.
    </para>
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in"
                fileref="../figures/Generic_CERN_Architecture.png"/>
        </imageobject>
    </mediaobject>
    <para>
        Additional monitoring tools in use include <link
        xlink:href="http://flume.apache.org/">Flume</link>, <link
        xlink:href="http://www.elasticsearch.org/">Elastic
        Search</link>, <link
        xlink:href="http://www.elasticsearch.org/overview/kibana/">Kibana</link>,
        and the CERN developed <link
        xlink:href="http://lemon.web.cern.ch/lemon/index.shtml">Lemon</link>
        project.
    </para>
    </section>
 </section>
--- a/doc/arch-design/compute_focus/section_tech_considerations_compute_focus.xml
+++ b/doc/arch-design/compute_focus/section_tech_considerations_compute_focus.xml
@ -1,275 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE section [
 <!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
 %openstack;
 ]>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="technical-considerations-compute-focus">
    <?dbhtml stop-chunking?>
    <title>Technical considerations</title>
    <para>In a compute-focused OpenStack cloud, the type of instance
        workloads you provision heavily influences technical
        decision making.</para>
    <para>Public and private clouds require deterministic capacity
        planning to support elastic growth in order to meet user SLA
        expectations. Deterministic capacity planning is the path to
        predicting the effort and expense of making a given process
        perform consistently. This process is important because,
        when a service becomes a critical part of a user's
        infrastructure, the user's experience links directly to the SLAs of
        the cloud itself.</para>
    <para>There are two aspects of capacity planning to consider:</para>
    <itemizedlist>
      <listitem>
        <para>Planning the initial deployment footprint</para>
      </listitem>
      <listitem>
        <para>Planning expansion of the environment to stay ahead of the
        demands of cloud users</para>
      </listitem>
    </itemizedlist>
    <para>Begin planning an initial OpenStack deployment footprint with
      estimations of expected uptake, and existing infrastructure workloads.</para>
    <para>The starting point is the core count of the cloud. By
        applying relevant ratios, the user can gather information
        about:</para>
    <itemizedlist>
        <listitem>
            <para>The number of expected concurrent instances:
            (overcommit fraction × cores) / virtual cores per instance</para>
        </listitem>
        <listitem>
            <para>Required storage: flavor disk size × number of instances</para>
        </listitem>
    </itemizedlist>
    <para>These ratios determine the amount of
        additional infrastructure needed to support the cloud. For
        example, consider a situation in which you require 1600
        instances, each with 2 vCPU and 50 GB of storage. Assuming the
        default overcommit rate of 16:1, working out the math provides
        an equation of:</para>
    <itemizedlist>
        <listitem>
            <para>1600 = (16 &times; (number of physical cores)) / 2</para>
        </listitem>
        <listitem>
            <para>Storage required = 50&nbsp;GB &times; 1600</para>
        </listitem>
    </itemizedlist>
    <para>On the surface, the equations reveal the need for 200
        physical cores and 80&nbsp;TB of storage for
        <filename>/var/lib/nova/instances/</filename>. However,
        it is also important to
        look at patterns of usage to estimate the load that the API
        services, database servers, and queue servers are likely to
        encounter.</para>
    <para>Aside from the creation and termination of instances, consider the
        impact of users accessing the service,
        particularly on nova-api and its associated database. Listing
        instances gathers a great deal of information and given the
        frequency with which users run this operation, a cloud with a
        large number of users can increase the load significantly.
        This can even occur unintentionally. For example, the
        OpenStack Dashboard instances tab refreshes the list of
        instances every 30 seconds, so leaving it open in a browser
        window can cause unexpected load.</para>
    <para>Consideration of these factors can help determine how many
        cloud controller cores you require. A server with 8 CPU cores
        and 8 GB of RAM server would be sufficient for a rack of
        compute nodes, given the above caveats.</para>
    <para>Key hardware specifications are also crucial to the
        performance of user instances. Be sure to consider budget and
        performance needs, including storage performance
        (spindles/core), memory availability (RAM/core), network
        bandwidth (Gbps/core), and overall CPU performance
        (CPU/core).</para>
    <para>The cloud resource calculator is a useful tool in examining
        the impacts of different hardware and instance load outs. See:
        <link xlink:href="https://github.com/noslzzp/cloud-resource-calculator/blob/master/cloud-resource-calculator.ods">https://github.com/noslzzp/cloud-resource-calculator/blob/master/cloud-resource-calculator.ods</link>
    </para>
    <section xml:id="expansion-planning-compute-focus">
        <title>Expansion planning</title>
    <para>A key challenge for planning the expansion of cloud
        compute services is the elastic nature of cloud infrastructure
        demands.</para>
    <para>Planning for expansion is a balancing act.
        Planning too conservatively can lead to unexpected
        oversubscription of the cloud and dissatisfied users. Planning
        for cloud expansion too aggressively can lead to unexpected
        underutilization of the cloud and funds spent unnecessarily on operating
        infrastructure.</para>
    <para>The key is to carefully monitor the trends in
        cloud usage over time. The intent is to measure the
        consistency with which you deliver services, not the
        average speed or capacity of the cloud. Using this information
        to model capacity performance enables users to more
        accurately determine the current and future capacity of the
        cloud.</para>
    </section>
    <section xml:id="cpu-and-ram-compute-focus">
      <title>CPU and RAM</title>
    <para>OpenStack enables users to overcommit CPU and RAM on
        compute nodes. This allows an increase in the number of
        instances running on the cloud at the cost of reducing the
        performance of the instances. OpenStack Compute uses the
        following ratios by default:</para>
    <itemizedlist>
        <listitem>
            <para>CPU allocation ratio: 16:1</para>
        </listitem>
        <listitem>
            <para>RAM allocation ratio: 1.5:1</para>
        </listitem>
    </itemizedlist>
    <para>The default CPU allocation ratio of 16:1 means that the
        scheduler allocates up to 16 virtual cores per physical core.
        For example, if a physical node has 12 cores, the scheduler
        sees 192 available virtual cores. With typical flavor
        definitions of 4 virtual cores per instance, this ratio would
        provide 48 instances on a physical node.</para>
    <para>Similarly, the default RAM allocation ratio of 1.5:1 means
        that the scheduler allocates instances to a physical node as
        long as the total amount of RAM associated with the instances
        is less than 1.5 times the amount of RAM available on the
        physical node.</para>
    <para>You must select the appropriate CPU and RAM allocation ratio
        based on particular use cases.</para>
    </section>
    <section xml:id="additional-hardware-compute-focus">
      <title>Additional hardware</title>
    <para>Certain use cases may benefit from exposure to additional
        devices on the compute node. Examples might include:</para>
    <itemizedlist>
        <listitem>
            <para>High performance computing jobs that benefit from
                the availability of graphics processing units (GPUs)
                for general-purpose computing.</para>
        </listitem>
        <listitem>
            <para>Cryptographic routines that benefit from the
                availability of hardware random number generators to
                avoid entropy starvation.</para>
        </listitem>
        <listitem>
            <para>Database management systems that benefit from the
                availability of SSDs for ephemeral storage to maximize
                read/write time.</para>
        </listitem>
    </itemizedlist>
    <para>Host aggregates group hosts that share similar
        characteristics, which can include hardware similarities. The
        addition of specialized hardware to a cloud deployment is
        likely to add to the cost of each node, so consider carefully
        whether all compute nodes, or
        just a subset targeted by flavors, need the
        additional customization to support the desired
        workloads.</para>
    </section>
    <section xml:id="utilization">
      <title>Utilization</title>
    <para>Infrastructure-as-a-Service offerings, including OpenStack,
        use flavors to provide standardized views of virtual machine
        resource requirements that simplify the problem of scheduling
        instances while making the best use of the available physical
        resources.</para>
    <para>In order to facilitate packing of virtual machines onto
        physical hosts, the default selection of flavors provides a
        second largest flavor that is half the size
        of the largest flavor in every dimension. It has half the
        vCPUs, half the vRAM, and half the ephemeral disk space. The
        next largest flavor is half that size again. The following figure
        provides a visual representation of this concept for a general
        purpose computing design:
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in"
                fileref="../figures/Compute_Tech_Bin_Packing_General1.png"
            />
        </imageobject>
    </mediaobject></para>
    <para>The following figure displays a CPU-optimized, packed server:
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in"
                fileref="../figures/Compute_Tech_Bin_Packing_CPU_optimized1.png"
            />
        </imageobject>
    </mediaobject></para>
    <para>These default flavors are well suited to typical configurations
        of commodity server hardware. To maximize utilization,
        however, it may be necessary to customize the flavors or
        create new ones in order to better align instance sizes to the
        available hardware.</para>
    <para>Workload characteristics may also influence hardware choices
        and flavor configuration, particularly where they present
        different ratios of CPU versus RAM versus HDD
        requirements.</para>
    <para>For more information on Flavors see:
        <link xlink:href="http://docs.openstack.org/openstack-ops/content/flavors.html">OpenStack Operations Guide: Flavors</link></para>
    </section>
    <section xml:id="openstack-components-compute-focus">
      <title>OpenStack components</title>
    <para>Due to the nature of the workloads in this
        scenario, a number of components are highly beneficial for
        a Compute-focused cloud. This includes the typical OpenStack
        components:</para>
    <itemizedlist>
        <listitem>
            <para>OpenStack Compute (nova)</para>
        </listitem>
        <listitem>
            <para>OpenStack Image service (glance)</para>
        </listitem>
        <listitem>
            <para>OpenStack Identity (keystone)</para>
        </listitem>
    </itemizedlist>
    <para>Also consider several specialized components:</para>
    <itemizedlist>
        <listitem>
            <para><glossterm>Orchestration</glossterm> (heat)</para>
                  <para>Given the nature of the
                    applications involved in this scenario, these are heavily
                    automated deployments. Making use of Orchestration is highly
                    beneficial in this case. You can script the deployment of a
                    batch of instances and the running of tests, but it
                    makes sense to use the Orchestration service
                    to handle all these actions.</para>
        </listitem>
        <listitem>
            <para>Telemetry (ceilometer)</para>
                <para>Telemetry and the alarms it generates support autoscaling
                  of instances using Orchestration. Users that are not using the
                  Orchestration service do not need to deploy the Telemetry
                  service and may choose to use external solutions to fulfill
                  their metering and monitoring requirements.</para>
        </listitem>
        <listitem>
            <para>OpenStack Block Storage (cinder)</para>
                <para>Due to the burst-able nature of the workloads and the
                  applications and instances that perform batch
                  processing, this cloud mainly uses memory or CPU, so
                  the need for add-on storage to each instance is not a likely
                  requirement. This does not mean that you do not use
                  OpenStack Block Storage (cinder) in the infrastructure, but
                  typically it is not a central component.</para>
        </listitem>
        <listitem>
            <para>Networking</para>
                <para>When choosing a networking platform, ensure that it either
                  works with all desired hypervisor and container technologies
                  and their OpenStack drivers, or that it includes an implementation of
                  an ML2 mechanism driver. You can mix networking platforms
                  that provide ML2 mechanisms drivers.</para>
        </listitem>
    </itemizedlist>
  </section>
 </section>
--- a/doc/arch-design/figures/Compute_NSX.png
+++ b/doc/arch-design/figures/Compute_NSX.png
--- a/doc/arch-design/figures/Compute_Tech_Bin_Packing_CPU_optimized1.png
+++ b/doc/arch-design/figures/Compute_Tech_Bin_Packing_CPU_optimized1.png
--- a/doc/arch-design/figures/Compute_Tech_Bin_Packing_General1.png
+++ b/doc/arch-design/figures/Compute_Tech_Bin_Packing_General1.png
--- a/doc/arch-design/figures/Example_Compute_Heavy_Multi-Hypervisor_-_Architecture_4.png
+++ b/doc/arch-design/figures/Example_Compute_Heavy_Multi-Hypervisor_-_Architecture_4.png
--- a/doc/arch-design/figures/Example_General_Purpose_Architecture_w_Swift.png
+++ b/doc/arch-design/figures/Example_General_Purpose_Architecture_w_Swift.png
--- a/doc/arch-design/figures/General_Architecture1.png
+++ b/doc/arch-design/figures/General_Architecture1.png
--- a/doc/arch-design/figures/General_Architecture2.png
+++ b/doc/arch-design/figures/General_Architecture2.png
--- a/doc/arch-design/figures/General_Architecture3.png
+++ b/doc/arch-design/figures/General_Architecture3.png
--- a/doc/arch-design/figures/Generic_CERN_Architecture.png
+++ b/doc/arch-design/figures/Generic_CERN_Architecture.png
--- a/doc/arch-design/figures/Generic_CERN_Example.png
+++ b/doc/arch-design/figures/Generic_CERN_Example.png
--- a/doc/arch-design/figures/Massively_Scalable_Cells_+_regions_+_azs.png
+++ b/doc/arch-design/figures/Massively_Scalable_Cells_+_regions_+_azs.png
--- a/doc/arch-design/figures/Methodology.png
+++ b/doc/arch-design/figures/Methodology.png
--- a/doc/arch-design/figures/Multi-Cloud_DR2.png
+++ b/doc/arch-design/figures/Multi-Cloud_DR2.png
--- a/doc/arch-design/figures/Multi-Cloud_Priv-AWS3.png
+++ b/doc/arch-design/figures/Multi-Cloud_Priv-AWS3.png
--- a/doc/arch-design/figures/Multi-Cloud_Priv-AWS4.png
+++ b/doc/arch-design/figures/Multi-Cloud_Priv-AWS4.png
--- a/doc/arch-design/figures/Multi-Cloud_Priv-Pub2.png
+++ b/doc/arch-design/figures/Multi-Cloud_Priv-Pub2.png
--- a/doc/arch-design/figures/Multi-Cloud_Priv-Pub3.png
+++ b/doc/arch-design/figures/Multi-Cloud_Priv-Pub3.png
--- a/doc/arch-design/figures/Multi-Cloud_failover.png
+++ b/doc/arch-design/figures/Multi-Cloud_failover.png
--- a/doc/arch-design/figures/Multi-Cloud_failover2.png
+++ b/doc/arch-design/figures/Multi-Cloud_failover2.png
--- a/doc/arch-design/figures/Multi-Site_Customer_Edge.png
+++ b/doc/arch-design/figures/Multi-Site_Customer_Edge.png
--- a/doc/arch-design/figures/Multi-Site_Location_Local.png
+++ b/doc/arch-design/figures/Multi-Site_Location_Local.png
--- a/doc/arch-design/figures/Multi-Site_shared_keystone.png
+++ b/doc/arch-design/figures/Multi-Site_shared_keystone.png
--- a/doc/arch-design/figures/Multi-Site_shared_keystone1.png
+++ b/doc/arch-design/figures/Multi-Site_shared_keystone1.png
--- a/doc/arch-design/figures/Multi-Site_shared_keystone_horizon.png
+++ b/doc/arch-design/figures/Multi-Site_shared_keystone_horizon.png
--- a/doc/arch-design/figures/Multi-Site_shared_keystone_horizon_swift.png
+++ b/doc/arch-design/figures/Multi-Site_shared_keystone_horizon_swift.png
--- a/doc/arch-design/figures/Multi-Site_shared_keystone_horizon_swift1.png
+++ b/doc/arch-design/figures/Multi-Site_shared_keystone_horizon_swift1.png
--- a/doc/arch-design/figures/Multi-site_Geo_Redundant_LB.png
+++ b/doc/arch-design/figures/Multi-site_Geo_Redundant_LB.png
--- a/doc/arch-design/figures/Network_Cloud_Storage1.png
+++ b/doc/arch-design/figures/Network_Cloud_Storage1.png
--- a/doc/arch-design/figures/Network_Cloud_Storage2.png
+++ b/doc/arch-design/figures/Network_Cloud_Storage2.png
--- a/doc/arch-design/figures/Network_Web_Services1.png
+++ b/doc/arch-design/figures/Network_Web_Services1.png
--- a/doc/arch-design/figures/OPST_0008_Compute_12015337_0314cd-compute_cells_high.png
+++ b/doc/arch-design/figures/OPST_0008_Compute_12015337_0314cd-compute_cells_high.png
--- a/doc/arch-design/figures/Special_case_SDN_external.png
+++ b/doc/arch-design/figures/Special_case_SDN_external.png
--- a/doc/arch-design/figures/Special_case_SDN_hosted.png
+++ b/doc/arch-design/figures/Special_case_SDN_hosted.png
--- a/doc/arch-design/figures/Specialized_Hardware2.png
+++ b/doc/arch-design/figures/Specialized_Hardware2.png
--- a/doc/arch-design/figures/Specialized_OOO.png
+++ b/doc/arch-design/figures/Specialized_OOO.png
--- a/doc/arch-design/figures/Specialized_VDI1.png
+++ b/doc/arch-design/figures/Specialized_VDI1.png
--- a/doc/arch-design/figures/Storage_Database_+_Object2.png
+++ b/doc/arch-design/figures/Storage_Database_+_Object2.png
--- a/doc/arch-design/figures/Storage_Database_+_Object3.png
+++ b/doc/arch-design/figures/Storage_Database_+_Object3.png
--- a/doc/arch-design/figures/Storage_Database_+_Object5.png
+++ b/doc/arch-design/figures/Storage_Database_+_Object5.png
--- a/doc/arch-design/figures/Storage_Hadoop.png
+++ b/doc/arch-design/figures/Storage_Hadoop.png
--- a/doc/arch-design/figures/Storage_Hadoop3.png
+++ b/doc/arch-design/figures/Storage_Hadoop3.png
--- a/doc/arch-design/figures/Storage_Object.png
+++ b/doc/arch-design/figures/Storage_Object.png
--- a/doc/arch-design/figures/arch-design.graffle
+++ b/doc/arch-design/figures/arch-design.graffle
--- a/doc/arch-design/figures/design-methodology.png
+++ b/doc/arch-design/figures/design-methodology.png
--- a/doc/arch-design/figures/openstack_fullcover2014_1.jpg
+++ b/doc/arch-design/figures/openstack_fullcover2014_1.jpg
--- a/doc/arch-design/figures/packingexample-2.png
+++ b/doc/arch-design/figures/packingexample-2.png
--- a/doc/arch-design/figures/region-example.png
+++ b/doc/arch-design/figures/region-example.png
--- a/doc/arch-design/generalpurpose/section_architecture_general_purpose.xml
+++ b/doc/arch-design/generalpurpose/section_architecture_general_purpose.xml
@ -1,720 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE section [
 <!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
 %openstack;
 ]>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="arch-guide-architecture-overview">
    <?dbhtml stop-chunking?>
    <title>Architecture</title>
    <para>Hardware selection involves three key areas:</para>
    <itemizedlist>
        <listitem>
            <para>Compute</para>
        </listitem>
        <listitem>
            <para>Network</para>
        </listitem>
        <listitem>
            <para>Storage</para>
        </listitem>
    </itemizedlist>
    <para>Hardware for a general purpose OpenStack cloud
        should reflect a cloud with no pre-defined usage model,
        designed to run a wide variety of applications with
        varying resource usage requirements.
        These applications include any of the following:</para>
      <itemizedlist>
        <listitem>
          <para>
            RAM-intensive
          </para>
        </listitem>
        <listitem>
          <para>
            CPU-intensive
          </para>
        </listitem>
        <listitem>
          <para>
            Storage-intensive
          </para>
        </listitem>
      </itemizedlist>
    <para>Certain hardware form factors may better suit a general
        purpose OpenStack cloud due to the requirement for equal (or
        nearly equal) balance of resources. Server hardware must provide
        the following:</para>
      <itemizedlist>
        <listitem>
          <para>
            Equal (or nearly equal) balance of compute capacity (RAM and CPU)
          </para>
        </listitem>
        <listitem>
          <para>
            Network capacity (number and speed of links)
          </para>
        </listitem>
        <listitem>
          <para>
            Storage capacity (gigabytes or terabytes as well as Input/Output
            Operations Per Second (<glossterm>IOPS</glossterm>)
          </para>
        </listitem>
      </itemizedlist>
    <para>Evaluate server hardware around four conflicting
      dimensions:</para>
    <variablelist>
      <varlistentry>
        <term>Server density</term>
        <listitem>
          <para>A measure of how many servers can
            fit into a given measure of physical space, such as a
            rack unit [U].</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Resource capacity</term>
        <listitem>
          <para>The number of CPU cores, amount of RAM,
             or amount of deliverable storage.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Expandability</term>
        <listitem>
          <para>Limit of additional resources you can add to
            a server.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Cost</term>
        <listitem>
          <para>The relative purchase price of the hardware
             weighted against the level of design effort needed to
             build the system.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    <para>Increasing server density means sacrificing resource
        capacity or expandability, however, increasing resource
        capacity and expandability increases cost and decreases server
        density. As a result, determining the best server hardware for
        a general purpose OpenStack architecture means understanding
        how choice of form factor will impact the rest of the
        design. The following list outlines the form factors to
        choose from:</para>
    <itemizedlist>
        <listitem>
            <para>Blade servers typically support dual-socket
                multi-core CPUs. Blades also offer
                outstanding density.</para>
        </listitem>
        <listitem>
            <para>1U rack-mounted servers occupy only a single rack
                unit. Their benefits include high density, support for
                dual-socket multi-core CPUs, and support for
                reasonable RAM amounts. This form factor offers
                limited storage capacity, limited network capacity,
                and limited expandability.</para>
        </listitem>
        <listitem>
            <para>2U rack-mounted servers offer the expanded storage
                and networking capacity that 1U servers tend to lack,
                but with a corresponding decrease in server density
                (half the density offered by 1U rack-mounted
                servers).</para>
        </listitem>
        <listitem>
            <para>Larger rack-mounted servers, such as 4U servers,
                will tend to offer even greater CPU capacity, often
                supporting four or even eight CPU sockets. These
                servers often have much greater expandability so will
                provide the best option for upgradability. This means,
                however, that the servers have a much lower server
                density and a much greater hardware cost.</para>
        </listitem>
        <listitem>
            <para><emphasis>Sled servers</emphasis> are rack-mounted servers that support
                multiple independent servers in a single 2U or 3U
                enclosure. This form factor offers increased density
                over typical 1U-2U rack-mounted servers but tends to
                suffer from limitations in the amount of storage or
                network capacity each individual server
                supports.</para>
        </listitem>
    </itemizedlist>
    <para>The best form factor for server hardware
        supporting a general purpose OpenStack cloud is driven by
        outside business and cost factors. No single reference
        architecture applies to all implementations; the decision
        must flow from user requirements, technical
        considerations, and operational considerations. Here are some
        of the key factors that influence the selection of server
        hardware:</para>
    <variablelist>
      <varlistentry>
        <term>Instance density</term>
        <listitem>
            <para>Sizing is an important
                consideration for a general purpose OpenStack cloud.
                The expected or anticipated number of instances that
                each hypervisor can host is a common meter used in
                sizing the deployment. The selected server hardware
                needs to support the expected or anticipated instance
                density.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Host density</term>
        <listitem>
            <para>Physical data centers have limited
                physical space, power, and cooling. The number of
                hosts (or hypervisors) that can be fitted into a given
                metric (rack, rack unit, or floor tile) is another
                important method of sizing. Floor weight is an often
                overlooked consideration. The data center floor must
                be able to support the weight of the proposed number
                of hosts within a rack or set of racks. These factors
                need to be applied as part of the host density
                calculation and server hardware selection.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Power density</term>
        <listitem>
            <para>Data centers have a specified amount
                of power fed to a given rack or set of racks. Older
                data centers may have a power density as power as low
                as 20 AMPs per rack, while more recent data centers
                can be architected to support power densities as high
                as 120 AMP per rack. The selected server hardware must
                take power density into account.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Network connectivity</term>
        <listitem>
            <para>The selected server hardware
                must have the appropriate number of network
                connections, as well as the right type of network
                connections, in order to support the proposed
                architecture. Ensure that, at a minimum, there are at
                least two diverse network connections coming into each
                rack.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    <para>The selection of form factors or architectures affects the selection
        of server hardware. Ensure that the selected server hardware
        is configured to support enough storage capacity (or storage
        expandability) to match the requirements of selected scale-out
        storage solution. Similarly, the network architecture impacts
        the server hardware selection and vice versa.</para>
    <section xml:id="selecting-storage-hardware">
        <title>Selecting storage hardware</title>
    <para>Determine storage hardware architecture by
        selecting specific storage architecture. Determine the selection of
        storage architecture by evaluating possible solutions against the
        critical factors, the user requirements, technical
        considerations, and operational considerations.
        Incorporate the following facts into your storage architecture:</para>
    <variablelist>
      <varlistentry>
        <term>Cost</term>
        <listitem>
            <para>Storage can be a significant portion of the
                overall system cost. For an organization that is concerned
                with vendor support, a commercial storage solution is
                advisable, although it comes with a higher price
                tag. If initial capital expenditure requires
                minimization, designing a system based on commodity
                hardware would apply. The trade-off is potentially
                higher support costs and a greater risk of
                incompatibility and interoperability issues.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Scalability</term>
        <listitem>
            <para>Scalability, along with expandability, is a major
                consideration in a general purpose OpenStack cloud. It
                might be difficult to predict the final intended size
                of the implementation as there are no established
                usage patterns for a general purpose cloud. It might
                become necessary to expand the initial deployment in
                order to accommodate growth and user demand.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Expandability</term>
        <listitem>
            <para>Expandability is a major architecture factor for
                storage solutions with general purpose OpenStack
                cloud. A storage solution that expands
                to 50&nbsp;PB is considered more expandable than a
                solution that only scales to 10&nbsp;PB. This meter
                is related to scalability, which is the measure of a
                solution's performance as it expands.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    <para>Using a scale-out storage solution with direct-attached
        storage (DAS) in the servers is well suited for a general
        purpose OpenStack cloud. Cloud services requirements determine
        your choice of scale-out solution. You need to determine if
        a single, highly expandable and highly vertical, scalable,
        centralized storage array is suitable for your design.
        After determining an approach, select the storage hardware
        based on this criteria.</para>
    <para>This list expands upon the potential impacts for including a
        particular storage architecture (and corresponding storage
        hardware) into the design for a general purpose OpenStack
        cloud:</para>
    <variablelist>
      <varlistentry>
        <term>Connectivity</term>
        <listitem>
            <para>Ensure that, if storage protocols
                other than Ethernet are part of the storage solution,
                the appropriate hardware has been selected.
                If a centralized storage array is selected, ensure
                that the hypervisor will be able to connect to that
                storage array for image storage.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Usage</term>
        <listitem>
            <para>How the particular storage architecture will
                be used is critical for determining the architecture.
                Some of the configurations that will influence the
                architecture include whether it will be used by the
                hypervisors for ephemeral instance storage or if
                OpenStack Object Storage will use it for object storage.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Instance and image locations</term>
        <listitem>
            <para>
              Where instances and images will be stored will influence
              the architecture.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Server hardware</term>
        <listitem>
            <para>If the solution is a scale-out
                storage architecture that includes DAS, it
                will affect the server hardware selection. This could
                ripple into the decisions that affect host density,
                instance density, power density, OS-hypervisor,
                management tools and others.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    <para>General purpose OpenStack cloud has multiple options.
        The key factors that will have an influence
        on selection of storage hardware for a general purpose
        OpenStack cloud are as follows:</para>
    <variablelist>
      <varlistentry>
        <term>Capacity</term>
        <listitem>
            <para>Hardware resources selected for the resource nodes
                should be capable of supporting enough storage for the
                cloud services. Defining the initial requirements and
                ensuring the design can support adding capacity is
                important. Hardware nodes selected for object storage
                should be capable of support a large number of inexpensive
                disks with no reliance on RAID controller cards.
                Hardware nodes selected for block storage should be capable
                of supporting high speed storage solutions and RAID controller
                cards to provide performance and redundancy to storage at a
                hardware level.
                Selecting hardware RAID controllers that automatically repair
                damaged arrays will assist with the replacement and repair of
                degraded or deleted storage devices.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Performance</term>
        <listitem>
            <para>Disks selected for object storage services do not need
               to be fast performing disks. We recommend that object storage
               nodes take advantage of the best cost per terabyte available
               for storage. Contrastingly, disks chosen for block storage
               services should take advantage of performance boosting
               features that may entail the use of SSDs or flash storage
               to provide high performance block storage pools. Storage
               performance of ephemeral disks used for instances should
               also be taken into consideration.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Fault tolerance</term>
        <listitem>
            <para>Object storage resource nodes have
                no requirements for hardware fault tolerance or RAID
                controllers. It is not necessary to plan for fault
                tolerance within the object storage hardware because
                the object storage service provides replication
                between zones as a feature of the service. Block
                storage nodes, compute nodes, and cloud controllers
                should all have fault tolerance built in at the
                hardware level by making use of hardware RAID
                controllers and varying levels of RAID configuration.
                The level of RAID chosen should be consistent with the
                performance and availability requirements of the
                cloud.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    </section>
    <section xml:id="selecting-networking-hardware">
        <title>Selecting networking hardware</title>
    <para>Selecting network architecture determines which network
        hardware will be used. Networking software is determined by
        the selected networking hardware.</para>
    <para>There are more subtle design impacts that need to be considered.
        The selection of certain networking hardware (and the networking software)
        affects the management tools that can be used. There are
        exceptions to this; the rise of <emphasis>open</emphasis> networking software
        that supports a range of networking hardware means that there
        are instances where the relationship between networking
        hardware and networking software are not as tightly defined.</para>
    <para>Some of the key considerations that should be included in
        the selection of networking hardware include:</para>
    <variablelist>
      <varlistentry>
        <term>Port count</term>
        <listitem>
            <para>The design will require networking
                hardware that has the requisite port count.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Port density</term>
        <listitem>
            <para>The network design will be affected by
                the physical space that is required to provide the
                requisite port count. A higher port density is preferred,
                as it leaves more rack space for compute or storage components
                that may be required by the design. This can also lead into
                concerns about fault domains and power density that
                should be considered. Higher density switches are more
                expensive and should also be considered, as it is
                important not to over design the network if it is not
                required.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Port speed</term>
        <listitem>
            <para>
              The networking hardware must support the proposed
              network speed, for example: 1&nbsp;GbE, 10&nbsp;GbE, or
              40&nbsp;GbE (or even 100&nbsp;GbE).</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Redundancy</term>
        <listitem>
            <para>The level of network hardware redundancy
                required is influenced by the user requirements for
                high availability and cost considerations. Network
                redundancy can be achieved by adding redundant power
                supplies or paired switches. If this is a requirement,
                the hardware will need to support this configuration.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Power requirements</term>
        <listitem>
            <para>Ensure that the physical data
                center provides the necessary power for the selected
                network hardware.</para>
              <note>
                <para>
                  This may be an issue for spine switches in a leaf and
                  spine fabric, or end of row (EoR) switches.</para>
              </note>
        </listitem>
      </varlistentry>
    </variablelist>
    <para>There is no single best practice architecture for the
        networking hardware supporting a general purpose OpenStack
        cloud that will apply to all implementations. Some of the key
        factors that will have a strong influence on selection of
        networking hardware include:</para>
    <variablelist>
      <varlistentry>
        <term>Connectivity</term>
        <listitem>
            <para>All nodes within an OpenStack cloud
                require network connectivity. In some
                cases, nodes require access to more than one network
                segment. The design must encompass sufficient network
                capacity and bandwidth to ensure that all
                communications within the cloud, both north-south and
                east-west traffic have sufficient resources
                available.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Scalability</term>
        <listitem>
            <para>The network design should
                encompass a physical and logical network design that
                can be easily expanded upon. Network hardware should
                offer the appropriate types of interfaces and speeds
                that are required by the hardware nodes.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Availability</term>
        <listitem>
            <para>To ensure that access to nodes within
                the cloud is not interrupted, we recommend that
                the network architecture identify any single points of
                failure and provide some level of redundancy or fault
                tolerance. With regard to the network infrastructure
                itself, this often involves use of networking
                protocols such as LACP, VRRP or others to achieve a
                highly available network connection. In addition, it
                is important to consider the networking implications
                on API availability. In order to ensure that the APIs,
                and potentially other services in the cloud are highly
                available, we recommend you design a load balancing
                solution within the network architecture to
                accommodate for these requirements.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    </section>
    <section xml:id="software-selection">
        <title>Software selection</title>
    <para>Software selection for a general purpose OpenStack
        architecture design needs to include these three areas:</para>
    <itemizedlist>
        <listitem>
            <para>Operating system (OS) and hypervisor</para>
        </listitem>
        <listitem>
            <para>OpenStack components</para>
        </listitem>
        <listitem>
            <para>Supplemental software</para>
        </listitem>
    </itemizedlist>
    </section>
    <section xml:id="os-and-hypervisor">
      <title>Operating system and hypervisor</title>
    <para>The operating system (OS) and hypervisor have a
      significant impact on the overall design. Selecting a particular
      operating system and hypervisor can directly affect server
      hardware selection. Make sure the storage
      hardware and topology support the selected operating
      system and hypervisor combination. Also ensure the networking
      hardware selection and topology will work with the chosen operating
      system and hypervisor combination.</para>
    <para>Some areas that could be impacted by the selection of OS and
        hypervisor include:</para>
    <variablelist>
      <varlistentry>
        <term>Cost</term>
        <listitem>
            <para>Selecting a commercially supported hypervisor,
                such as Microsoft Hyper-V, will result in a different
                cost model rather than community-supported open source
                hypervisors including <glossterm
                baseform="kernel-based VM (KVM)">KVM</glossterm>,
                Kinstance or <glossterm>Xen</glossterm>. When
                comparing open source OS solutions, choosing Ubuntu
                over Red Hat (or vice versa) will have an impact on
                cost due to support contracts.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Supportability</term>
        <listitem>
            <para>Depending on the selected
                hypervisor, staff should have the appropriate
                training and knowledge to support the selected OS and
                hypervisor combination. If they do not, training will
                need to be provided which could have a cost impact on
                the design.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Management tools</term>
        <listitem>
            <para>The management tools used for
                Ubuntu and Kinstance differ from the management tools
                for VMware vSphere. Although both OS and hypervisor
                combinations are supported by OpenStack, there will be
                very different impacts to the rest of the design as a
                result of the selection of one combination versus the
                other.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Scale and performance</term>
        <listitem>
            <para>Ensure that selected OS and
                hypervisor combinations meet the appropriate scale and
                performance requirements. The chosen architecture will
                need to meet the targeted instance-host ratios with
                the selected OS-hypervisor combinations.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Security</term>
        <listitem>
            <para>Ensure that the design can accommodate
                regular periodic installations of application security
                patches while maintaining required workloads. The
                frequency of security patches for the proposed
                OS-hypervisor combination will have an impact on
                performance and the patch installation process could
                affect maintenance windows.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Supported features</term>
        <listitem>
            <para>Determine which features of OpenStack are required.
                This will often determine the selection of the OS-hypervisor combination.
                Some features are only available with specific operating systems or
                hypervisors.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Interoperability</term>
        <listitem>
            <para>You will need to consider how the OS and hypervisor combination
                interactions with other operating systems and hypervisors, including
                other software solutions.
                Operational troubleshooting tools for one OS-hypervisor
                combination may differ from the tools used for another OS-hypervisor
                combination and, as a result, the design will need to
                address if the two sets of tools need to interoperate.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    </section>
    <section xml:id="openstack-components">
        <title>OpenStack components</title>
    <para>Selecting which OpenStack components are included in the overall
        design is important. Some OpenStack components, like
        compute and Image service, are required in every architecture. Other
        components, like Orchestration, are not always required.</para>
    <para>Excluding certain OpenStack components can limit or constrain
        the functionality of other components. For example, if the architecture includes
        Orchestration but excludes Telemetry, then the design will not be able
        to take advantage of Orchestrations' auto scaling functionality.
        It is important to research the component interdependencies
        in conjunction with the technical requirements before deciding
        on the final architecture.</para>
    <section xml:id="networking-software">
      <title>Networking software</title>
    <para>OpenStack Networking (neutron) provides a wide variety of networking
        services for instances. There are many additional networking
        software packages that can be useful when managing OpenStack
        components. Some examples include:</para>
      <itemizedlist>
        <listitem>
          <para>
            Software to provide load balancing
          </para>
        </listitem>
        <listitem>
          <para>
            Network redundancy protocols
          </para>
        </listitem>
        <listitem>
          <para>
            Routing daemons
          </para>
        </listitem>
      </itemizedlist>
    <para>Some of these software packages are described
        in more detail in the <citetitle>OpenStack High Availability
        Guide</citetitle> (refer to the <link
        xlink:href="http://docs.openstack.org/ha-guide/networking-ha.html">Network
        controller cluster stack chapter</link> of the OpenStack High
        Availability Guide).</para>
    <para>For a general purpose OpenStack cloud, the OpenStack
        infrastructure components need to be highly available. If
        the design does not include hardware load balancing,
        networking software packages like HAProxy will need to be
        included.</para>
    </section>
    <section xml:id="management-software">
      <title>Management software</title>
    <para>Selected supplemental software solution impacts and
        affects the overall OpenStack cloud design. This includes
        software for providing clustering, logging, monitoring and
        alerting.</para>
    <para>Inclusion of clustering software, such as Corosync or
        Pacemaker, is determined primarily by the availability
        requirements. The impact of including (or not
        including) these software packages is primarily determined by
        the availability of the cloud infrastructure and the
        complexity of supporting the configuration after it is
        deployed. The <link xlink:href="http://docs.openstack.org/ha-guide/"><citetitle>OpenStack High Availability Guide</citetitle></link>
        provides more
        details on the installation and configuration of Corosync and
        Pacemaker, should these packages need to be included in the
        design.</para>
    <para>Requirements for logging, monitoring, and alerting are
        determined by operational considerations. Each of these
        sub-categories includes a number of various options.</para>
    <para>If these software packages are required, the
        design must account for the additional resource consumption
        (CPU, RAM, storage, and network bandwidth). Some other potential
        design impacts include:</para>
    <itemizedlist>
        <listitem>
            <para>OS-hypervisor combination: Ensure that the
                selected logging, monitoring, or alerting tools
                support the proposed OS-hypervisor combination.</para>
        </listitem>
        <listitem>
            <para>Network hardware: The network hardware selection
                needs to be supported by the logging, monitoring, and
                alerting software.</para>
        </listitem>
    </itemizedlist>
    </section>
    <section xml:id="database-software">
      <title>Database software</title>
    <para>OpenStack components often require access
        to back-end database services to store state and configuration
        information. Selecting an appropriate back-end database
        that satisfies the availability and fault tolerance
        requirements of the OpenStack services is required. OpenStack
        services supports connecting to a database that is supported
        by the SQLAlchemy python drivers, however, most common
        database deployments make use of MySQL or variations of it. We
        recommend that the database, which provides back-end
        service within a general purpose cloud, be made highly
        available when using an available technology which can
        accomplish that goal.</para>
    </section>
 </section>
 </section>
--- a/doc/arch-design/generalpurpose/section_operational_considerations_general_purpose.xml
+++ b/doc/arch-design/generalpurpose/section_operational_considerations_general_purpose.xml
@ -1,156 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="operational-considerations-general-purpose">
    <?dbhtml stop-chunking?>
    <title>Operational considerations</title>
    <para>In the planning and design phases of the build out, it is
        important to include the operation's function. Operational
        factors affect the design choices for a general purpose cloud,
        and operations staff are often tasked with the maintenance of
        cloud environments for larger installations.</para>
    <para>Expectations set by the Service Level Agreements (SLAs) directly
        affect knowing when and where you should implement redundancy and
        high availability. SLAs are contractual
        obligations that provide assurances for service availability.
        They define the levels of availability that drive the technical
        design, often with penalties for not meeting contractual obligations.</para>
    <para>SLA terms that affect design include:</para>
    <itemizedlist>
        <listitem>
            <para>API availability guarantees implying multiple
                infrastructure services and highly available
                load balancers.</para>
        </listitem>
        <listitem>
            <para>Network uptime guarantees affecting switch
                design, which might require redundant switching and
                power.</para>
        </listitem>
        <listitem>
            <para>Factor in networking security policy requirements
                in to your deployments.</para>
        </listitem>
    </itemizedlist>
    <section xml:id="support-and-maintainability-general-purpose">
      <title>Support and maintainability</title>
    <para>To be able to support and maintain an installation, OpenStack
        cloud management requires operations staff to understand and
        comprehend design architecture content. The operations and engineering
        staff skill level, and level of separation, are dependent on size and
        purpose of the installation. Large cloud service providers, or telecom
        providers, are more likely to be managed by specially trained, dedicated
        operations organizations. Smaller implementations are more likely to rely
        on support staff that need to take on combined engineering, design and
        operations functions.</para>
    <para>Maintaining OpenStack installations requires a
        variety of technical skills. You may want to consider using a third-party
        management company with special expertise in managing
        OpenStack deployment.</para>
    </section>
    <section xml:id="monitoring-general-purpose">
      <title>Monitoring</title>
    <para>OpenStack clouds require appropriate monitoring platforms to
        ensure errors are caught and managed appropriately. Specific
        meters that are critically important to monitor include:</para>
      <itemizedlist>
        <listitem>
          <para>
            Image disk utilization
          </para>
        </listitem>
        <listitem>
          <para>
            Response time to the Compute API
          </para>
        </listitem>
      </itemizedlist>
    <para>Leveraging existing monitoring systems is an effective check to
        ensure OpenStack environments can be monitored.</para>
    </section>
    <section xml:id="downtime-general-purpose">
      <title>Downtime</title>
    <para>To effectively run cloud installations, initial downtime planning
        includes creating processes and architectures that support
        the following:</para>
      <itemizedlist>
        <listitem>
          <para>
            Planned (maintenance)
          </para>
        </listitem>
        <listitem>
          <para>
            Unplanned (system faults)
          </para>
        </listitem>
      </itemizedlist>
    <para>Resiliency of overall system and individual components are going
        to be dictated by the requirements of the SLA, meaning designing
        for high availability (HA) can have cost ramifications.</para>
    </section>
    <section xml:id="capacity-planning">
      <title>Capacity planning</title>
    <para>Capacity constraints for a general purpose cloud environment
        include:</para>
      <itemizedlist>
       <listitem>
         <para>
          Compute limits
         </para>
       </listitem>
       <listitem>
         <para>
          Storage limits
         </para>
       </listitem>
     </itemizedlist>
   <para>A relationship exists between the size of the compute environment
        and the supporting OpenStack infrastructure controller nodes requiring
        support.</para>
   <para>Increasing the size of the supporting compute environment increases
        the network traffic and messages, adding load to the controller or
        networking nodes. Effective monitoring of the environment will help
        with capacity decisions on scaling.</para>
   <para>Compute nodes automatically attach to OpenStack clouds, resulting in
        a horizontally scaling process when adding extra compute capacity to an
        OpenStack cloud. Additional processes are required to place nodes into
        appropriate availability zones and host aggregates. When adding additional
        compute nodes to environments, ensure identical or functional compatible
        CPUs are used, otherwise live migration features will break. It is necessary
        to add rack capacity or network switches as scaling out compute hosts directly
        affects network and datacenter resources.</para>
   <para>Assessing the average workloads and increasing the number of instances
        that can run within the compute environment by adjusting the overcommit
        ratio is another option. It is important to remember that changing the CPU overcommit
        ratio can have a detrimental effect and cause a potential increase in a
        noisy neighbor. The additional risk of increasing the overcommit ratio is
        more instances failing when a compute host fails.</para>
    <para>Compute host components can also be upgraded to account for
        increases in demand; this is known as vertical scaling.
        Upgrading CPUs with more cores, or increasing the overall
        server memory, can add extra needed capacity depending on
        whether the running applications are more CPU intensive or
        memory intensive.</para>
    <para>Insufficient disk capacity could also have a negative effect
        on overall performance including CPU and memory usage.
        Depending on the back-end architecture of the OpenStack Block
        Storage layer, capacity includes adding disk shelves to
        enterprise storage systems or installing additional block
        storage nodes. Upgrading directly attached storage installed in
        compute hosts, and adding capacity to the shared storage for
        additional ephemeral storage to instances, may be necessary.</para>
    <para>
      For a deeper discussion on many of these topics, refer to the
      <link
      xlink:href="http://docs.openstack.org/ops"><citetitle>OpenStack
      Operations Guide</citetitle></link>.
    </para>
    </section>
 </section>
--- a/doc/arch-design/generalpurpose/section_prescriptive_example_general_purpose.xml
+++ b/doc/arch-design/generalpurpose/section_prescriptive_example_general_purpose.xml
@ -1,101 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="prescriptive-example-online-classifieds">
    <?dbhtml stop-chunking?>
    <title>Prescriptive example</title>
    <para>An online classified advertising company wants to run web applications
        consisting of Tomcat, Nginx and MariaDB in a private cloud. To be able
        to meet policy requirements, the cloud infrastructure will run in their
        own data center. The company has predictable load requirements, but requires
        scaling to cope with nightly increases in demand. Their current environment
        does not have the flexibility to align with their goal of running an open
        source API environment. The current environment consists of the following:</para>
    <itemizedlist>
        <listitem>
            <para>Between 120 and 140 installations of Nginx and
                Tomcat, each with 2 vCPUs and 4 GB of RAM</para>
        </listitem>
        <listitem>
            <para>A three-node MariaDB and Galera cluster, each with 4
                vCPUs and 8 GB RAM</para>
        </listitem>
    </itemizedlist>
    <para>The company runs hardware load balancers and multiple web
        applications serving their websites, and orchestrates environments
        using combinations of scripts and Puppet. The website generates large amounts of
        log data daily that requires archiving.</para>
    <para>The solution would consist of the following OpenStack
        components:</para>
    <itemizedlist>
        <listitem>
            <para>A firewall, switches and load balancers on the
                public facing network connections.</para>
        </listitem>
        <listitem>
            <para>OpenStack Controller service running Image,
                Identity, Networking, combined with support services such as
                MariaDB and RabbitMQ, configured for high availability on at
                least three controller nodes.</para>
        </listitem>
        <listitem>
            <para>OpenStack Compute nodes running the KVM
                hypervisor.</para>
        </listitem>
        <listitem>
            <para>OpenStack Block Storage for use by compute instances,
            requiring persistent storage (such as databases for
                dynamic sites).</para>
        </listitem>
        <listitem>
            <para>OpenStack Object Storage for serving static objects
                (such as images).</para>
        </listitem>
    </itemizedlist>
    <mediaobject><imageobject><imagedata contentwidth="4in"
                    fileref="../figures/General_Architecture3.png"
                /></imageobject></mediaobject>
    <para>Running up to 140
        web instances and the small number of MariaDB instances
        requires 292 vCPUs available, as well as 584 GB RAM. On a
        typical 1U server using dual-socket hex-core Intel CPUs with
        Hyperthreading, and assuming 2:1 CPU overcommit ratio, this
        would require 8 OpenStack Compute nodes.</para>
    <para>The web application instances run from local storage on each
        of the OpenStack Compute nodes. The web application instances
        are stateless, meaning that any of the instances can fail and
        the application will continue to function.</para>
    <para>MariaDB server instances store their data on shared
        enterprise storage, such as NetApp or Solidfire devices. If a
        MariaDB instance fails, storage would be expected to be
        re-attached to another instance and rejoined to the Galera
        cluster.</para>
    <para>Logs from the web application servers are shipped to
        OpenStack Object Storage for processing and
        archiving.</para>
    <para>Additional capabilities can be realized by
        moving static web content to be served from OpenStack Object
        Storage containers, and backing the OpenStack Image service
        with OpenStack Object Storage.</para>
      <note>
        <para>
          Increasing OpenStack Object Storage means network bandwidth
          needs to be taken into consideration. Running OpenStack Object
          Storage with network connections offering 10 GbE or better connectivity
          is advised.
        </para>
      </note>
    <para>Leveraging Orchestration and Telemetry services is also a potential issue when
        providing auto-scaling, orchestrated web application environments.
        Defining the web applications in <glossterm
        baseform="Heat Orchestration Template (HOT)">Heat Orchestration Templates (HOT)</glossterm>
        negates the reliance on the current scripted Puppet solution.</para>
    <para>OpenStack Networking can be used to control hardware load
        balancers through the use of plug-ins and the Networking API.
        This allows users to control hardware load balance pools
        and instances as members in these pools, but their use in
        production environments must be carefully weighed against
        current stability.</para>
 </section>
--- a/doc/arch-design/generalpurpose/section_tech_considerations_general_purpose.xml
+++ b/doc/arch-design/generalpurpose/section_tech_considerations_general_purpose.xml
@ -1,738 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE section [
 <!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
 %openstack;
 ]>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="technical-considerations-general-purpose">
    <?dbhtml stop-chunking?>
    <title>Technical considerations</title>
    <para>General purpose clouds are expected to
      include these base services:</para>
      <itemizedlist>
        <listitem>
          <para>
            Compute
          </para>
        </listitem>
        <listitem>
          <para>
            Network
          </para>
        </listitem>
        <listitem>
          <para>
            Storage
          </para>
        </listitem>
      </itemizedlist>
    <para>Each of these services have different resource requirements.
        As a result, you must make design decisions relating directly
        to the service, as well as provide a balanced infrastructure for
        all services.</para>
    <para>Take into consideration the unique aspects of each service, as
      individual characteristics and service mass can impact the hardware
      selection process. Hardware designs should be generated for each of the
      services.</para>
    <para>Hardware decisions are also made in relation to network architecture
        and facilities planning. These factors play heavily into
        the overall architecture of an OpenStack cloud.</para>
    <section xml:id="designing-compute-resources-tech-considerations">
      <title>Compute resource design</title>
      <para>When designing compute resource pools, a number of factors
        can impact your design decisions. Factors such as number of processors,
        amount of memory, and the quantity of storage required for each hypervisor
        must be taken into account.</para>
    <para>You will also need to decide whether to provide compute resources
        in a single pool or in multiple pools. In most cases, multiple pools
        of resources can be allocated and addressed on demand. A compute design
        that allocates multiple pools of resources makes best use of application
        resources, and is commonly referred to as
    <firstterm>bin packing</firstterm>.</para>
    <para>In a bin packing design, each independent resource pool provides service
        for specific flavors. This helps to ensure that, as instances are scheduled
        onto compute hypervisors, each independent node's resources will be allocated
        in a way that makes the most efficient use of the available hardware. Bin
        packing also requires a common hardware design, with all hardware nodes within
        a compute resource pool sharing a common processor, memory, and storage layout.
        This makes it easier to deploy, support, and maintain nodes throughout their
        life cycle.</para>
    <para>An <firstterm>overcommit ratio</firstterm> is the ratio of available
        virtual resources to available physical resources. This ratio is
        configurable for CPU and memory. The default CPU overcommit ratio is 16:1, and
        the default memory overcommit ratio is 1.5:1. Determining the tuning of the
        overcommit ratios during the design phase is important as it has a direct
        impact on the hardware layout of your compute nodes.</para>
    <para>When selecting a processor, compare features and performance
        characteristics. Some processors include features specific to virtualized
        compute hosts, such as hardware-assisted virtualization, and technology
        related to memory paging (also known as EPT shadowing). These types of features
        can have a significant impact on the performance of your virtual machine.</para>
    <para>You will also need to consider the compute requirements of non-hypervisor
        nodes (sometimes referred to as resource nodes). This includes controller, object
        storage, and block storage nodes, and networking services.</para>
    <para>The number of processor cores and threads impacts the number of worker
        threads which can be run on a resource node. Design decisions must relate
        directly to the service being run on it, as well as provide a balanced
        infrastructure for all services.</para>
    <para>Workload can be unpredictable in a general purpose cloud, so consider
        including the ability to add additional compute resource pools on demand.
        In some cases, however, the demand for certain instance types or flavors may not
        justify individual hardware design. In either case, start by allocating
        hardware designs that are capable of servicing the most common instance
        requests. If you want to add additional hardware to the overall architecture,
        this can be done later.</para>
    </section>
    <section xml:id="designing-network-resources-tech-considerations">
      <title>Designing network resources</title>
      <para>OpenStack clouds generally have multiple network segments, with
        each segment providing access to particular resources. The network services
        themselves also require network communication paths which should
        be separated from the other networks. When designing network services
        for a general purpose cloud, plan for either a physical or logical
        separation of network segments used by operators and tenants. You can also
        create an additional network segment for access to internal services such as
        the message bus and database used by various services. Segregating these
        services onto separate networks helps to protect sensitive data and protects
        against unauthorized access to services.</para>
    <para>Choose a networking service based on the requirements of your instances.
       The architecture and design of your cloud will impact whether you choose
       OpenStack Networking(neutron), or legacy networking (nova-network).</para>
      <variablelist>
        <varlistentry>
          <term>Legacy networking (nova-network)</term>
            <listitem>
              <para>The legacy networking (nova-network) service is primarily a
                  layer-2 networking service that functions in two modes, which
                  use VLANs in different ways. In a flat network mode, all
                  network hardware nodes and devices throughout the cloud are connected
                  to a single layer-2 network segment that provides access to
                  application data.</para>
              <para>When the network devices in the cloud support segmentation
                  using VLANs, legacy networking can operate in the second mode. In
                  this design model, each tenant within the cloud is assigned a
                  network subnet which is mapped to a VLAN on the physical
                  network. It is especially important to remember the maximum
                  number of 4096 VLANs which can be used within a spanning tree
                  domain. This places a hard limit on the amount of
                  growth possible within the data center. When designing a
                  general purpose cloud intended to support multiple tenants, we
                  recommend the use of legacy networking with VLANs, and
                  not in flat network mode.</para>
            </listitem>
        </varlistentry>
      </variablelist>
    <para>Another consideration regarding network is the fact that
        legacy networking is entirely managed by the cloud operator;
        tenants do not have control over network resources. If tenants
        require the ability to manage and create network resources
        such as network segments and subnets, it will be necessary to
        install the OpenStack Networking service to provide network
        access to instances.</para>
      <variablelist>
        <varlistentry>
          <term>OpenStack Networking (neutron)</term>
            <listitem>
              <para>OpenStack Networking (neutron) is a first class networking
                  service that gives full control over creation of virtual
                  network resources to tenants. This is often accomplished in
                  the form of tunneling protocols which will establish
                  encapsulated communication paths over existing network
                  infrastructure in order to segment tenant traffic. These
                  methods vary depending on the specific implementation, but
                  some of the more common methods include tunneling over GRE,
                  encapsulating with VXLAN, and VLAN tags.</para>
            </listitem>
        </varlistentry>
      </variablelist>
    <para>We recommend you design at least three network segments:</para>
    <itemizedlist>
      <listitem>
        <para>The first segment is a public network, used for access to REST APIs
          by tenants and operators. The controller nodes and swift
          proxies are the only devices connecting to this network segment. In some
          cases, this network might also be serviced by hardware load balancers
          and other network devices.</para>
      </listitem>
      <listitem>
        <para>The second segment is used by administrators to manage hardware resources.
          Configuration management tools also use this for deploying software and
          services onto new hardware. In some cases, this network segment might also be
          used for internal services, including the message bus and database services.
          This network needs to communicate with every hardware node.
          Due to the highly sensitive nature of this network segment, you also need to
          secure this network from unauthorized access.</para>
      </listitem>
      <listitem>
        <para>The third network segment is used by applications and consumers to access
          the physical network, and for users to access applications. This network is
          segregated from the one used to access the cloud APIs and is not
          capable of communicating directly with the hardware resources in the cloud.
          Compute resource nodes and network gateway services which allow application
          data to access the physical network from outside of the cloud need to
          communicate on this network segment.</para>
      </listitem>
    </itemizedlist>
    </section>
    <section xml:id="designing-openstack-object-storage-tech-considerations">
        <title>Designing OpenStack Object Storage</title>
    <para>When designing hardware resources for OpenStack Object
        Storage, the primary goal is to maximize the amount of storage
        in each resource node while also ensuring that the cost per
        terabyte is kept to a minimum. This often involves utilizing
        servers which can hold a large number of spinning disks.
        Whether choosing to use 2U server form factors with directly
        attached storage or an external chassis that holds a larger
        number of drives, the main goal is to maximize the storage
        available in each node.</para>
        <note>
          <para>We do not recommended investing in enterprise class drives
           for an OpenStack Object Storage cluster. The consistency and
           partition tolerance characteristics of OpenStack Object
           Storage ensures that data stays up to date and survives
           hardware faults without the use of any specialized data
           replication devices.</para>
        </note>
    <para>One of the benefits of OpenStack Object Storage is the ability
        to mix and match drives by making use of weighting within the
        swift ring. When designing your swift storage cluster, we
        recommend making use of the most cost effective storage
        solution available at the time.</para>
    <para>To achieve durability and availability of data stored as objects
        it is important to design object storage resource pools to ensure they can
        provide the suggested availability. Considering rack-level and zone-level
        designs to accommodate the number of replicas configured to be stored in the
        Object Storage service (the default number of replicas is three) is important
        when designing beyond the hardware node level. Each replica of
        data should exist in its own availability zone with its own
        power, cooling, and network resources available to service
        that specific zone.</para>
    <para>Object storage nodes should be designed so that the number
        of requests does not hinder the performance of the cluster.
        The object storage service is a chatty protocol, therefore
        making use of multiple processors that have higher core counts
        will ensure the IO requests do not inundate the server.</para>
    </section>
    <section xml:id="designing-openstack-block-storage">
      <title>Designing OpenStack Block Storage</title>
    <para>When designing OpenStack Block Storage resource nodes, it is
        helpful to understand the workloads and requirements that will
        drive the use of block storage in the cloud. We recommend designing
        block storage pools so that tenants can choose appropriate storage
        solutions for their applications. By creating multiple storage pools of different
        types, in conjunction with configuring an advanced storage
        scheduler for the block storage service, it is possible to
        provide tenants with a large catalog of storage services with
        a variety of performance levels and redundancy options.</para>
    <para>Block storage also takes advantage of a number of enterprise storage
        solutions. These are addressed via a plug-in driver developed by the
        hardware vendor. A large number of
        enterprise storage plug-in drivers ship out-of-the-box with
        OpenStack Block Storage (and many more available via third
        party channels). General purpose clouds are more likely to use
        directly attached storage in the majority of block storage nodes,
        deeming it necessary to provide additional levels of service to tenants
        which can only be provided by enterprise class storage solutions.</para>
    <para>Redundancy and availability requirements impact the decision to use
        a RAID controller card in block storage nodes. The input-output per second (IOPS)
        demand of your application will influence whether or not you should use a RAID
        controller, and which level of RAID is required.
        Making use of higher performing RAID volumes is suggested when
        considering performance. However, where redundancy of
        block storage volumes is more important we recommend
        making use of a redundant RAID configuration such as RAID 5 or
        RAID 6. Some specialized features, such as automated
        replication of block storage volumes, may require the use of
        third-party plug-ins and enterprise block storage solutions in
        order to provide the high demand on storage. Furthermore,
        where extreme performance is a requirement it may also be
        necessary to make use of high speed SSD disk drives' high
        performing flash storage solutions.</para>
    </section>
    <section xml:id="software-selection-tech-considerations">
        <title>Software selection</title>
    <para>The software selection process plays a large role in the
        architecture of a general purpose cloud. The following have
        a large impact on the design of the cloud:</para>
      <itemizedlist>
        <listitem>
          <para>
            Choice of operating system
          </para>
        </listitem>
        <listitem>
          <para>
            Selection of OpenStack software components
          </para>
        </listitem>
        <listitem>
          <para>
            Choice of hypervisor
          </para>
        </listitem>
        <listitem>
          <para>
            Selection of supplemental software
          </para>
        </listitem>
      </itemizedlist>
    <para>Operating system (OS) selection plays a large role in the
        design and architecture of a cloud. There are a number of OSes
        which have native support for OpenStack including:</para>
      <itemizedlist>
        <listitem>
          <para>
            Ubuntu
          </para>
        </listitem>
        <listitem>
          <para>
            Red Hat Enterprise Linux (RHEL)
          </para>
        </listitem>
        <listitem>
          <para>
            CentOS
          </para>
        </listitem>
        <listitem>
          <para>
            SUSE Linux Enterprise Server (SLES)
          </para>
        </listitem>
      </itemizedlist>
      <note>
        <para>Native support is not a constraint on the choice of OS; users are
        free to choose just about any Linux distribution (or even
        Microsoft Windows) and install OpenStack directly from source
        (or compile their own packages). However, many organizations will
        prefer to install OpenStack from distribution-supplied packages or
        repositories (although using the distribution vendor's OpenStack
        packages might be a requirement for support).
        </para>
      </note>
    <para>OS selection also directly influences hypervisor selection.
        A cloud architect who selects Ubuntu, RHEL, or SLES has some
        flexibility in hypervisor; KVM, Xen, and LXC are supported
        virtualization methods available under OpenStack Compute
        (nova) on these Linux distributions. However, a cloud architect
        who selects Hyper-V is limited to Windows Servers. Similarly, a
        cloud architect who selects XenServer is limited to the CentOS-based
        dom0 operating system provided with XenServer.</para>
    <para>The primary factors that play into OS-hypervisor selection
        include:</para>
    <variablelist>
        <varlistentry>
          <term>User requirements</term>
          <listitem>
            <para>The selection of OS-hypervisor
                combination first and foremost needs to support the
                user requirements.</para>
          </listitem>
        </varlistentry>
        <varlistentry>
          <term>Support</term>
          <listitem>
            <para>The selected OS-hypervisor combination
                needs to be supported by OpenStack.</para>
          </listitem>
        </varlistentry>
        <varlistentry>
          <term>Interoperability</term>
          <listitem>
            <para>The OS-hypervisor needs to be
                interoperable with other features and services in the
                OpenStack design in order to meet the user
                requirements.</para>
          </listitem>
        </varlistentry>
    </variablelist>
    </section>
    <section xml:id="hypervisor-tech-considerations">
      <title>Hypervisor</title>
    <para>OpenStack supports a wide variety of hypervisors, one or
        more of which can be used in a single cloud. These hypervisors
        include:</para>
    <itemizedlist>
        <listitem>
            <para>KVM (and QEMU)</para>
        </listitem>
        <listitem>
            <para>XCP/XenServer</para>
        </listitem>
        <listitem>
            <para>vSphere (vCenter and ESXi)</para>
        </listitem>
        <listitem>
            <para>Hyper-V</para>
        </listitem>
        <listitem>
            <para>LXC</para>
        </listitem>
        <listitem>
            <para>Docker</para>
        </listitem>
        <listitem>
            <para>Bare-metal</para>
        </listitem>
    </itemizedlist>
    <para>A complete list of supported hypervisors and their
        capabilities can be found at
        <link xlink:href="https://wiki.openstack.org/wiki/HypervisorSupportMatrix">OpenStack Hypervisor Support Matrix</link>.
    </para>
    <para>We recommend general purpose clouds use hypervisors that
        support the most general purpose use cases, such as KVM and
        Xen. More specific hypervisors should be chosen to account
        for specific functionality or a supported feature requirement.
        In some cases, there may also be a mandated
        requirement to run software on a certified hypervisor
        including solutions from VMware, Microsoft, and Citrix.</para>
    <para>The features offered through the OpenStack cloud platform
        determine the best choice of a hypervisor. Each hypervisor
        has their own hardware requirements which may affect the decisions
        around designing a general purpose cloud.</para>
    <para>In a mixed hypervisor environment, specific aggregates of
        compute resources, each with defined capabilities, enable
        workloads to utilize software and hardware specific to their
        particular requirements. This functionality can be exposed
        explicitly to the end user, or accessed through defined
        metadata within a particular flavor of an instance.</para>
    </section>
    <section xml:id="openstack-components-tech-considerations">
      <title>OpenStack components</title>
    <para>A general purpose OpenStack cloud design should incorporate
        the core OpenStack services to provide a wide range of
        services to end-users. The OpenStack core services recommended
        in a general purpose cloud are:</para>
    <itemizedlist>
        <listitem>
            <para>OpenStack <glossterm>Compute</glossterm>
            (<glossterm>nova</glossterm>)</para>
        </listitem>
        <listitem>
            <para>OpenStack <glossterm>Networking</glossterm>
            (<glossterm>neutron</glossterm>)</para>
        </listitem>
        <listitem>
            <para>OpenStack <glossterm>Image service</glossterm>
            (<glossterm>glance</glossterm>)</para>
        </listitem>
        <listitem>
            <para>OpenStack <glossterm>Identity</glossterm>
            (<glossterm>keystone</glossterm>)</para>
        </listitem>
        <listitem>
            <para>OpenStack <glossterm>dashboard</glossterm>
            (<glossterm>horizon</glossterm>)</para>
        </listitem>
        <listitem>
            <para><glossterm>Telemetry</glossterm>
            (<glossterm>ceilometer</glossterm>)</para>
        </listitem>
    </itemizedlist>
    <para>A general purpose cloud may also include OpenStack
        <glossterm>Object Storage</glossterm> (<glossterm>swift</glossterm>).
        OpenStack <glossterm>Block Storage</glossterm>
        (<glossterm>cinder</glossterm>). These may be
        selected to provide storage to applications and
        instances.</para>
    </section>
    <section xml:id="supplemental-software-tech-considerations">
      <title>Supplemental software</title>
    <para>A general purpose OpenStack deployment consists of more than
        just OpenStack-specific components. A typical deployment
        involves services that provide supporting functionality,
        including databases and message queues, and may also involve
        software to provide high availability of the OpenStack
        environment. Design decisions around the underlying message
        queue might affect the required number of controller services,
        as well as the technology to provide highly resilient database
        functionality, such as MariaDB with Galera. In such a
        scenario, replication of services relies on quorum.</para>
    <para>Where many general purpose deployments use hardware load
        balancers to provide highly available API access and SSL
        termination, software solutions, for example HAProxy, can also
        be considered. It is vital to ensure that such software
        implementations are also made highly available. High
        availability can be achieved by using software such as
        Keepalived or Pacemaker with Corosync. Pacemaker and Corosync
        can provide active-active or active-passive highly available
        configuration depending on the specific service in the
        OpenStack environment. Using this software can affect the
        design as it assumes at least a 2-node controller
        infrastructure where one of those nodes may be running certain
        services in standby mode.</para>
    <para>Memcached is a distributed memory object caching system, and
        Redis is a key-value store. Both are deployed on
        general purpose clouds to assist in alleviating load to the
        Identity service. The memcached service caches tokens, and due
        to its distributed nature it can help alleviate some
        bottlenecks to the underlying authentication system. Using
        memcached or Redis does not affect the overall design of your
        architecture as they tend to be deployed onto the
        infrastructure nodes providing the OpenStack services.</para>
    </section>
    <section xml:id="controller-infrastructure-tech-considerations">
        <title>Controller infrastructure</title>
    <para>The Controller infrastructure nodes provide management
        services to the end-user as well as providing services
        internally for the operating of the cloud. The Controllers
        run message queuing services that carry system
        messages between each service. Performance issues related to
        the message bus would lead to delays in sending that message
        to where it needs to go. The result of this condition would be
        delays in operation functions such as spinning up and deleting
        instances, provisioning new storage volumes and managing
        network resources. Such delays could adversely affect an
        application’s ability to react to certain conditions,
        especially when using auto-scaling features. It is important
        to properly design the hardware used to run the controller
        infrastructure as outlined above in the Hardware Selection
        section.</para>
    <para>Performance of the controller services is not limited
        to processing power, but restrictions may emerge in serving
        concurrent users. Ensure that the APIs and Horizon services
        are load tested to ensure that you are able to serve your
        customers. Particular attention should be made to the
        OpenStack Identity Service (Keystone), which provides the
        authentication and authorization for all services, both
        internally to OpenStack itself and to end-users. This service
        can lead to a degradation of overall performance if this is
        not sized appropriately.</para>
    </section>
    <section xml:id="network-performance-tech-considerations">
      <title>Network performance</title>
    <para>In a general purpose OpenStack cloud, the requirements of
        the network help determine performance capabilities.
        It is possible to design OpenStack
        environments that run a mix of networking capabilities. By
        utilizing the different interface speeds, the users of the
        OpenStack environment can choose networks that are fit for
        their purpose.</para>
    <para>Network performance can be boosted considerably by
        implementing hardware load balancers to provide front-end
        service to the cloud APIs. The hardware load balancers also
        perform SSL termination if that is a requirement of your
        environment. When implementing SSL offloading, it is important
        to understand the SSL offloading capabilities of the devices
        selected.</para>
    </section>
    <section xml:id="compute-host-tech-considerations">
      <title>Compute host</title>
    <para>The choice of hardware specifications used in compute nodes
        including CPU, memory and disk type directly affects the
        performance of the instances. Other factors which can directly
        affect performance include tunable parameters within the
        OpenStack services, for example the overcommit ratio applied
        to resources. The defaults in OpenStack Compute set a 16:1
        over-commit of the CPU and 1.5 over-commit of the memory.
        Running at such high ratios leads to an increase in
        "noisy-neighbor" activity. Care must be taken when sizing your
        Compute environment to avoid this scenario. For running
        general purpose OpenStack environments it is possible to keep
        to the defaults, but make sure to monitor your environment as
        usage increases.</para>
    </section>
    <section xml:id="storage-performance-tech-considerations">
      <title>Storage performance</title>
    <para>When considering performance of OpenStack Block Storage,
        hardware and architecture choice is important. Block Storage
        can use enterprise back-end systems such as NetApp or EMC,
        scale out storage such as GlusterFS and Ceph, or simply use
        the capabilities of directly attached storage in the nodes
        themselves. Block Storage may be deployed so that traffic
        traverses the host network, which could affect, and be
        adversely affected by, the front-side API traffic performance.
        As such, consider using a dedicated data storage network with
        dedicated interfaces on the Controller and Compute
        hosts.</para>
    <para>When considering performance of OpenStack Object Storage, a
        number of design choices will affect performance. A user’s
        access to the Object Storage is through the proxy services,
        which sit behind hardware load balancers. By the
        very nature of a highly resilient storage system, replication
        of the data would affect performance of the overall system. In
        this case, 10 GbE (or better) networking is recommended
        throughout the storage network architecture.</para>
    </section>
    <section xml:id="availability-tech-considerations">
      <title>Availability</title>
    <para>In OpenStack, the infrastructure is integral to providing
        services and should always be available, especially when
        operating with SLAs. Ensuring network availability is
        accomplished by designing the network architecture so that no
        single point of failure exists. A consideration of the number
        of switches, routes and redundancies of power should be
        factored into core infrastructure, as well as the associated
        bonding of networks to provide diverse routes to your highly
        available switch infrastructure.</para>
    <para>The OpenStack services themselves should be deployed across
        multiple servers that do not represent a single point of
        failure. Ensuring API availability can be achieved by placing
        these services behind highly available load balancers that
        have multiple OpenStack servers as members.</para>
    <para>OpenStack lends itself to deployment in a highly available
        manner where it is expected that at least 2 servers be
        utilized. These can run all the services involved from the
        message queuing service, for example RabbitMQ or QPID, and an
        appropriately deployed database service such as MySQL or
        MariaDB. As services in the cloud are scaled out, back-end
        services will need to scale too. Monitoring and reporting on
        server utilization and response times, as well as load testing
        your systems, will help determine scale out decisions.</para>
    <para>Care must be taken when deciding network functionality.
        Currently, OpenStack supports both the legacy networking (nova-network)
        system and the newer, extensible OpenStack Networking (neutron). Both
        have their pros and cons when it comes to providing highly
        available access. Legacy networking, which provides networking
        access maintained in the OpenStack Compute code, provides a
        feature that removes a single point of failure when it comes
        to routing, and this feature is currently missing in OpenStack
        Networking. The effect of legacy networking’s multi-host
        functionality restricts failure domains to the host running
        that instance.</para>
    <para>When using OpenStack Networking, the
        OpenStack controller servers or separate Networking
        hosts handle routing. For a deployment that requires features
        available in only Networking, it is possible to
        remove this restriction by using third party software that
        helps maintain highly available L3 routes. Doing so allows for
        common APIs to control network hardware, or to provide complex
        multi-tier web applications in a secure manner. It is also
        possible to completely remove routing from
        Networking, and instead rely on hardware routing capabilities.
        In this case, the switching infrastructure must support L3
        routing.</para>
     <para>OpenStack Networking and legacy networking
       both have their advantages and
       disadvantages. They are both valid and supported options that
       fit different network deployment models described in the
       <citetitle><link
       xlink:href="http://docs.openstack.org/openstack-ops/content/network_design.html#network_deployment_options"
       >OpenStack Operations Guide</link></citetitle>.</para>
    <para>Ensure your deployment has adequate back-up capabilities.</para>
    <para>Application design must also be factored into the
        capabilities of the underlying cloud infrastructure. If the
        compute hosts do not provide a seamless live migration
        capability, then it must be expected that when a compute host
        fails, that instance and any data local to that instance will
        be deleted. However, when providing an expectation to users
        that instances have a high-level of uptime guarantees, the
        infrastructure must be deployed in a way that eliminates any
        single point of failure when a compute host disappears. This
        may include utilizing shared file systems on enterprise
        storage or OpenStack Block storage to provide a level of
        guarantee to match service features.</para>
    <para>For more information on high availability in OpenStack, see the <link
      xlink:href="http://docs.openstack.org/ha-guide/"><citetitle>OpenStack
      High Availability Guide</citetitle></link>.
        </para>
    </section>
    <section xml:id="security-tech-considerations">
      <title>Security</title>
    <para>A security domain comprises users, applications, servers or
        networks that share common trust requirements and expectations
        within a system. Typically they have the same authentication
        and authorization requirements and users.</para>
    <para>These security domains are:</para>
    <itemizedlist>
        <listitem>
            <para>Public</para>
        </listitem>
        <listitem>
            <para>Guest</para>
        </listitem>
        <listitem>
            <para>Management</para>
        </listitem>
        <listitem>
            <para>Data</para>
        </listitem>
    </itemizedlist>
    <para>These security domains can be mapped to an OpenStack
        deployment individually, or combined. In each case, the cloud operator
        should be aware of the appropriate security concerns. Security
        domains should be mapped out against your specific OpenStack
        deployment topology. The domains and their trust requirements
        depend upon whether the cloud instance is public, private, or
        hybrid.</para>
      <itemizedlist>
        <listitem>
          <para>The public security domain is an entirely untrusted area of
           the cloud infrastructure. It can refer to the internet as a
           whole or simply to networks over which you have no authority.
           This domain should always be considered untrusted.</para>
        </listitem>
        <listitem>
          <para>The guest security domain handles compute data generated by
           instances on the cloud but not services that support the
           operation of the cloud, such as API calls. Public cloud
           providers and private cloud providers who do not have
           stringent controls on instance use or who allow unrestricted
           internet access to instances should consider this domain to be
           untrusted. Private cloud providers may want to consider this
           network as internal and therefore trusted only if they have
           controls in place to assert that they trust instances and all
           their tenants.</para>
        </listitem>
        <listitem>
          <para>The management security domain is where services interact.
           Sometimes referred to as the <emphasis>control plane</emphasis>, the networks
           in this domain transport confidential data such as configuration
           parameters, user names, and passwords. In most deployments this
           domain is considered trusted.</para>
        </listitem>
        <listitem>
          <para>The data security domain is concerned primarily with
           information pertaining to the storage services within
           OpenStack. Much of the data that crosses this network has high
           integrity and confidentiality requirements and, depending on
           the type of deployment, may also have strong availability
           requirements. The trust level of this network is heavily
           dependent on other deployment decisions.</para>
        </listitem>
      </itemizedlist>
    <para>When deploying OpenStack in an enterprise as a private cloud
        it is usually behind the firewall and within the trusted
        network alongside existing systems. Users of the cloud are
        employees that are bound by the security
        requirements set forth by the company. This tends to push most
        of the security domains towards a more trusted model. However,
        when deploying OpenStack in a public facing role, no
        assumptions can be made and the attack vectors significantly
        increase.</para>
    <para>Consideration must be taken when managing the users of the
        system for both public and private clouds. The identity
        service allows for LDAP to be part of the authentication
        process. Including such systems in an OpenStack deployment may
        ease user management if integrating into existing
        systems.</para>
    <para>It is important to understand that user authentication
        requests include sensitive information including user names,
        passwords, and authentication tokens. For this reason, placing
        the API services behind hardware that performs SSL termination
        is strongly recommended.</para>
    <para>
      For more information OpenStack Security, see the <link
      xlink:href="http://docs.openstack.org/security-guide/"><citetitle>OpenStack
      Security Guide</citetitle></link>
    </para>
  </section>
 </section>
--- a/doc/arch-design/generalpurpose/section_user_requirements_general_purpose.xml
+++ b/doc/arch-design/generalpurpose/section_user_requirements_general_purpose.xml
@ -1,155 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="user-requirements-general-purpose">
    <?dbhtml stop-chunking?>
    <title>User requirements</title>
    <para>When building a general purpose cloud, you should follow the
        <glossterm baseform="IaaS">Infrastructure-as-a-Service (IaaS)</glossterm>
        model; a platform best suited for use cases with simple requirements.
        General purpose cloud user requirements are not complex.
        However, it is important to capture them even
        if the project has minimum business and technical requirements, such as a
        proof of concept (PoC), or a small lab platform.</para>
      <note>
        <para>
          The following user considerations are written from the perspective of
          the cloud builder, not from the perspective of the end user.
        </para>
      </note>
    <variablelist>
      <varlistentry>
        <term>Cost</term>
        <listitem>
            <para>Financial factors are a primary concern for
                any organization. Cost is an important criterion
                as general purpose clouds are considered the baseline
                from which all other cloud architecture environments
                derive. General purpose clouds do not always provide
                the most cost-effective environment for specialized
                applications or situations. Unless razor-thin margins and costs have
                been mandated as a critical factor, cost should not be
                the sole consideration when choosing or designing a
                general purpose architecture.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Time to market</term>
        <listitem>
            <para>The ability to deliver services or products within
                a flexible time frame is a common business factor
                when building a general purpose cloud.
                Delivering a product in six months instead
                of two years is a driving force behind the
                decision to build general purpose clouds. General
                purpose clouds allow users to self-provision and gain
                access to compute, network, and storage resources
                on-demand thus decreasing time to market.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Revenue opportunity</term>
        <listitem>
            <para>Revenue opportunities for a
                cloud will vary greatly based on the intended
                use case of that particular cloud. Some general
                purpose clouds are built for commercial customer
                facing products, but there are alternatives
                that might make the general purpose cloud the right
                choice.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    <section xml:id="technical-requirements">
      <title>Technical requirements</title>
    <para>Technical cloud architecture requirements should be weighted
        against the business requirements.
    </para>
    <variablelist>
      <varlistentry>
        <term>Performance</term>
        <listitem>
            <para>As a baseline product, general purpose
                clouds do not provide optimized performance for any
                particular function. While a general purpose cloud
                should provide enough performance to satisfy average
                user considerations, performance is not a general
                purpose cloud customer driver.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>No predefined usage model</term>
        <listitem>
            <para>The lack of a pre-defined
                usage model enables the user to run a wide variety of
                applications without having to know the application
                requirements in advance. This provides a degree of
                independence and flexibility that no other cloud
                scenarios are able to provide.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>On-demand and self-service application</term>
        <listitem>
            <para>By
                definition, a cloud provides end users with the
                ability to self-provision computing power, storage,
                networks, and software in a simple and flexible way.
                The user must be able to scale their resources up to a
                substantial level without disrupting the underlying
                host operations. One of the benefits of using a
                general purpose cloud architecture is the ability to
                start with limited resources and increase them over
                time as the user demand grows.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Public cloud</term>
        <listitem>
            <para>For a company interested in building a
                commercial public cloud offering based on OpenStack,
                the general purpose architecture model might be the
                best choice. Designers are not always going to
                know the purposes or workloads for which the end users
                will use the cloud.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Internal consumption (private) cloud</term>
        <listitem>
            <para>Organizations need to determine if it is logical to
                create their own clouds internally. Using a private cloud,
                organizations are able to maintain complete control over
                architectural and cloud components.</para>
              <note>
                <para>Users will want to combine
                using the internal cloud with access to an external
                cloud. If that case is likely, it might be worth
                exploring the possibility of taking a multi-cloud
                approach with regard to at least some of the
                architectural elements.
                </para>
              </note>
            <para>Designs that incorporate the
                use of multiple clouds, such as a private cloud and a
                public cloud offering, are described in the
                "Multi-Cloud" scenario, see <xref linkend="multi_site"/>.
            </para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Security</term>
        <listitem>
            <para>Security should be implemented according
                to asset, threat, and vulnerability risk assessment
                matrices. For cloud domains that require increased
                computer security, network security, or information
                security, a general purpose cloud is not considered an
                appropriate choice.</para>
        </listitem>
      </varlistentry>
    </variablelist>
  </section>
 </section>
--- a/doc/arch-design/hybrid/section_architecture_hybrid.xml
+++ b/doc/arch-design/hybrid/section_architecture_hybrid.xml
@ -1,190 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="arch-guide-architecture-hybrid">
    <?dbhtml stop-chunking?>
    <title>Architecture</title>
    <para>Map out the dependencies of the expected workloads
        and the cloud infrastructures required to support them to architect a
        solution for the broadest compatibility between cloud platforms,
        minimizing the need to create workarounds and processes to fill
        identified gaps.</para>
    <para>For your chosen cloud management platform, note the relative
          levels of support for both monitoring and orchestration.</para>
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in"
                fileref="../figures/Multi-Cloud_Priv-AWS4.png"/>
        </imageobject>
    </mediaobject>
    <section xml:id="image-portability">
      <title>Image portability</title>
    <para>The majority of cloud workloads currently run on instances
        using hypervisor technologies. The challenge is that each of these
        hypervisors uses an image format that may not be compatible with the
        others. When possible, standardize on a single hypervisor and instance
        image format. This may not be possible when using externally-managed
        public clouds.</para>
    <para>Conversion tools exist to address image format compatibility.
        Examples include <link
        xlink:href="http://libguestfs.org/virt-v2v">virt-p2v/virt-v2v</link>
        and <link
        xlink:href="http://libguestfs.org/virt-edit.1.html">
        virt-edit</link>. These tools cannot serve beyond basic cloud instance
        specifications.</para>
    <para>Alternatively, build a thin operating system image as
        the base for new instances. This facilitates rapid creation of cloud
        instances using cloud orchestration or configuration management tools
        for more specific templating. Remember if you intend to use portable
        images for disaster recovery, application diversity, or high
        availability, your users could move the images and instances between
        cloud platforms regularly.</para>
    </section>
    <section xml:id="upper-layer-services">
      <title>Upper-layer services</title>
    <para>Many clouds offer complementary services beyond the
        basic compute, network, and storage components. These
        additional services often simplify the deployment
        and management of applications on a cloud platform.</para>
    <para>When moving workloads from the source to the destination
        cloud platforms, consider that the destination cloud platform
        may not have comparable services. Implement workloads in a
        different way or by using a different technology.</para>
    <para>For example, moving an application that uses a NoSQL database
        service such as MongoDB could cause difficulties in maintaining
        the application between the platforms.</para>
    <para>There are a number of options that are appropriate for
        the hybrid cloud use case:</para>
    <itemizedlist>
        <listitem>
            <para>Implementing a baseline of upper-layer services
                across all of the cloud platforms. For
                platforms that do not support a given service, create
                a service on top of that platform and apply it to the
                workloads as they are launched on that cloud.</para>
            <para>For example, through the <glossterm>Database service</glossterm>
                for OpenStack (<glossterm>trove</glossterm>),
                OpenStack supports MySQL-as-a-Service but not NoSQL
                databases in production. To move from or run
                alongside AWS, a NoSQL workload must use an automation
                tool, such as the Orchestration service (heat), to
                recreate the NoSQL database on top of OpenStack.
            </para>
        </listitem>
        <listitem>
            <para>Deploying a <glossterm>Platform-as-a-Service (PaaS)</glossterm>
                technology that abstracts the
                upper-layer services from the underlying cloud
                platform. The unit of application deployment and
                migration is the PaaS. It leverages the services of
                the PaaS and only consumes the base infrastructure
                services of the cloud platform.</para>
        </listitem>
        <listitem>
            <para>Using automation tools to create the required upper-layer services
                that are portable across all cloud platforms.</para>
            <para>For example, instead of using database services that
                are inherent in the cloud platforms, launch cloud
                instances and deploy the databases on those
                instances using scripts or configuration and
                application deployment tools.</para>
        </listitem>
    </itemizedlist>
    </section>
    <section xml:id="network-services">
      <title>Network services</title>
    <para>Network services functionality is a critical component of
        multiple cloud architectures. It is an important factor
        to assess when choosing a CMP and cloud provider.
        Considerations include:</para>
      <itemizedlist>
        <listitem>
          <para>
            Functionality
          </para>
        </listitem>
        <listitem>
          <para>
            Security
          </para>
        </listitem>
        <listitem>
          <para>
            Scalability
          </para>
        </listitem>
        <listitem>
          <para>
            High availability (HA)
          </para>
        </listitem>
      </itemizedlist>
    <para>Verify and test critical cloud endpoint features.</para>
    <itemizedlist>
        <listitem>
            <para>After selecting the network functionality framework,
                you must confirm the functionality is compatible. This
                ensures testing and functionality persists
                during and after upgrades.</para>
              <note>
                <para>Diverse cloud platforms may de-synchronize
                   over time if you do not maintain their mutual compatibility.
                   This is a particular issue with APIs.</para>
              </note>
        </listitem>
        <listitem>
            <para>Scalability across multiple cloud providers determines
                your choice of underlying network framework. It is important to
                have the network API functions presented and to verify
                that the desired functionality persists across all
                chosen cloud endpoint.</para>
        </listitem>
        <listitem>
            <para>High availability implementations vary in
                functionality and design. Examples of some common
                methods are active-hot-standby, active-passive, and
                active-active. Develop your high availability
                implementation and a test framework to understand
                the functionality and limitations of the environment.</para>
        </listitem>
        <listitem>
            <para>It is imperative to address security considerations.
                For example, addressing how data is secured between client and
                endpoint and any traffic that traverses the multiple clouds.
                Business and regulatory requirements dictate what security
                approach to take. For more information, see the
                <link linkend="security-overview">Security
                Requirements Chapter</link></para>
        </listitem>
    </itemizedlist>
    </section>
    <section xml:id="data">
      <title>Data</title>
    <para>Traditionally, replication has been the best method of protecting
        object store implementations. A variety of replication methods exist
        in storage architectures, for example synchronous and asynchronous
        mirroring. Most object stores and back-end storage systems implement
        methods for replication at the storage subsystem layer.
        Object stores also tailor replication techniques
        to fit a cloud's requirements.</para>
    <para>Organizations must find the right balance between
        data integrity and data availability. Replication strategy may
        also influence disaster recovery methods.</para>
    <para>Replication across different racks, data centers, and
        geographical regions increases focus on
        determining and ensuring data locality. The ability to
        guarantee data is accessed from the nearest or fastest storage
        can be necessary for applications to perform well.</para>
      <note>
        <para>When running embedded object store methods, ensure that you do
            not instigate extra data replication as this can cause performance
            issues.</para>
      </note>
    </section>
 </section>
--- a/doc/arch-design/hybrid/section_operational_considerations_hybrid.xml
+++ b/doc/arch-design/hybrid/section_operational_considerations_hybrid.xml
@ -1,86 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="arch-guide-hybrid-operational-considerations">
    <?dbhtml stop-chunking?>
    <title>Operational considerations</title>
    <para>Hybrid cloud deployments present complex operational
        challenges. Differences between provider clouds can cause
        incompatibilities with workloads or Cloud Management
        Platforms (CMP). Cloud providers may also offer different levels of
        integration with competing cloud offerings.</para>
    <para>Monitoring is critical to maintaining a hybrid cloud, and it is
        important to determine if a CMP supports
        monitoring of all the clouds involved, or if compatible APIs
        are available to be queried for necessary information.</para>
    <section xml:id="agility">
      <title>Agility</title>
    <para>Hybrid clouds provide application
        availability across different cloud environments and
        technologies. This availability enables the deployment to
        survive disaster in any single cloud environment.
        Each cloud should provide the means to create instances quickly
        in response to capacity issues or failure elsewhere in the hybrid
        cloud.</para>
    </section>
    <section xml:id="application-readiness-hybrid">
      <title>Application readiness</title>
    <para>Enterprise workloads that depend on the
        underlying infrastructure for availability are not designed to
        run on OpenStack. If the application cannot
        tolerate infrastructure failures, it is likely to require
        significant operator intervention to recover. Applications for
        hybrid clouds must be fault tolerant, with an SLA that is not tied
        to the underlying infrastructure. Ideally, cloud applications should be
        able to recover when entire racks and data centers experience an
        outage.</para>
    </section>
    <section xml:id="upgrades">
      <title>Upgrades</title>
    <para>If a deployment includes a public cloud, predicting
        upgrades may not be possible. Carefully examine provider SLAs.</para>
      <note>
        <para>At massive scale, even when
        dealing with a cloud that offers an SLA with a high percentage
        of uptime, workloads must be able to recover quickly.</para>
      </note>
    <para>When upgrading private cloud deployments, minimize disruption by
        making incremental changes and providing a facility to either rollback
        or continue to roll forward when using a continuous delivery
        model.</para>
    <para>You may need to coordinate CMP upgrades with hybrid cloud upgrades if
        there are API changes.</para>
    </section>
    <section xml:id="network-operation-center-noc">
      <title>Network Operation Center</title>
    <para>Consider infrastructure control
        when planning the Network Operation Center (NOC)
        for a hybrid cloud environment. If a significant
        portion of the cloud is on externally managed systems,
        prepare for situations where it may not be possible to
        make changes.
        Additionally, providers may differ on how
        infrastructure must be managed and exposed. This can lead to
        delays in root cause analysis where each insists the blame
        lies with the other provider.</para>
    <para>Ensure that the network structure connects all clouds to form
        integrated system, keeping in mind the state of handoffs.
        These handoffs must both be as reliable as possible and
        include as little latency as possible to ensure the best
        performance of the overall system.</para>
    </section>
    <section xml:id="maintainability">
      <title>Maintainability</title>
    <para>Hybrid clouds rely on third party systems and processes. As a
        result, it is not possible to guarantee
        proper maintenance of the overall system. Instead, be prepared to
        abandon workloads and recreate them in an improved state.</para>
     </section>
 </section>
--- a/doc/arch-design/hybrid/section_prescriptive_examples_hybrid.xml
+++ b/doc/arch-design/hybrid/section_prescriptive_examples_hybrid.xml
@ -1,173 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="prescriptive-examples-multi-cloud">
    <?dbhtml stop-chunking?>
    <title>Prescriptive examples</title>
    <para>Hybrid cloud environments are designed for
        these use cases:</para>
    <itemizedlist>
        <listitem>
            <para>Bursting workloads from private to public OpenStack
                clouds</para>
        </listitem>
        <listitem>
            <para>Bursting workloads from private to public
                non-OpenStack clouds</para>
        </listitem>
        <listitem>
            <para>High availability across clouds (for technical
                diversity)</para>
        </listitem>
    </itemizedlist>
    <para>This chapter provides examples of environments
        that address each of these use cases.</para>
    <section xml:id="bursting-to-public-openstack-cloud">
    <title>Bursting to a public OpenStack cloud</title>
    <para>Company A's data center is running low on
        capacity. It is not possible to expand the data center in the
        foreseeable future. In order to accommodate
        the continuously growing need for development resources in the
        organization, Company A decides to use resources in the public
        cloud.</para>
    <para>Company A has an established data
         center with a substantial amount of hardware. Migrating the
         workloads to a public cloud is not feasible.</para>
    <para>The company has an internal cloud management platform that
        directs requests to the appropriate cloud, depending on
        the local capacity. This is a custom in-house application written for
           this specific purpose.</para>
    <para>This solution is depicted in the figure below:</para>
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in"
                fileref="../figures/Multi-Cloud_Priv-Pub3.png"
            />
        </imageobject>
    </mediaobject>
    <para>This example shows two clouds with a Cloud Management
        Platform (CMP) connecting them. This guide does not
        discuss a specific CMP, but describes how the Orchestration and
        Telemetry services handle, manage, and control workloads.</para>
    <para>The private OpenStack cloud has at least one
        controller and at least one compute node. It includes
        metering using the Telemetry service. The Telemetry service
        captures the load increase and the CMP processes the information.
        If there is available capacity, the CMP uses the
        OpenStack API to call the Orchestration service. This creates
        instances on the private cloud in response to user requests.
        When capacity is not available on the private cloud,
        the CMP issues a request to the Orchestration service API of
        the public cloud. This creates the instance on the public
        cloud.</para>
    <para>In this example, Company A does not direct the deployments to an
        external public cloud due to concerns regarding resource control,
        security, and increased operational expense</para>
    </section>
    <section xml:id="bursting-to-public-nonopenstack-cloud">
      <title>Bursting to a public non-OpenStack cloud</title>
    <para>The second example examines bursting workloads from the
        private cloud into a non-OpenStack public cloud using Amazon
        Web Services (AWS) to take advantage of additional capacity
        and to scale applications.</para>
    <para>The following diagram demonstrates an OpenStack-to-AWS hybrid
        cloud:</para>
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in"
                fileref="../figures/Multi-Cloud_Priv-AWS4.png"
            />
        </imageobject>
    </mediaobject>
    <para>Company B states that its developers are already using AWS and
        do not want to change to a different provider.</para>
    <para>If the CMP is capable of connecting to an external
        cloud provider with an appropriate API, the workflow process
        remains the same as the previous scenario. The actions the
        CMP takes, such as monitoring loads and creating new instances,
        stay the same. However, the CMP performs actions in the
        public cloud using applicable API calls.</para>
    <para>If the public cloud is AWS, the CMP would use the
        EC2 API to create a new instance and assign an Elastic IP.
        It can then add that IP to HAProxy in the private cloud.
        The CMP can also reference AWS-specific
        tools such as CloudWatch and CloudFormation.</para>
    <para>Several open source tool kits for building CMPs are
        available and can handle this kind of translation. Examples include
        ManageIQ, jClouds, and JumpGate.</para>
    </section>
    <section xml:id="high-availability-disaster-recovery">
      <title>High availability and disaster recovery</title>
    <para>Company C requires their local data center
        to be able to recover from failure. Some of the
        workloads currently in use are running on their private
        OpenStack cloud. Protecting the data involves Block Storage,
        Object Storage, and a database. The architecture
        supports the failure of large components of the system while
        ensuring that the system continues to deliver services.
        While the services remain available to users, the failed
        components are restored in the background based on standard
        best practice data replication policies. To achieve these objectives,
        Company C replicates data to a second cloud in a geographically distant
        location. The following diagram describes this system:</para>
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in"
                fileref="../figures/Multi-Cloud_failover2.png"
            />
        </imageobject>
    </mediaobject>
    <para>This example includes two private OpenStack clouds connected
        with a CMP. The source cloud,
        OpenStack Cloud 1, includes a controller and at least one
        instance running MySQL. It also includes at least one Block
        Storage volume and one Object Storage volume. This means that data
        is available to the users at all times. The details of the
        method for protecting each of these sources of data
        differs.</para>
    <para>Object Storage relies on the replication capabilities of
        the Object Storage provider. Company C enables OpenStack Object Storage
        so that it creates geographically separated replicas
        that take advantage of this feature. The company configures storage
        so that at least one replica exists in each cloud. In order to make
        this work, the company configures a single array spanning both clouds
        with OpenStack Identity. Using Federated Identity, the array talks
        to both clouds, communicating with OpenStack Object Storage
        through the Swift proxy.</para>
    <para>For Block Storage, the replication is a little more
        difficult, and involves tools outside of OpenStack itself. The
        OpenStack Block Storage volume is not set as the drive itself
        but as a logical object that points to a physical back end. Disaster
        recovery is configured for Block Storage for
        synchronous backup for the highest level of data protection,
        but asynchronous backup could have been set as an alternative
        that is not as latency sensitive. For asynchronous backup, the
        Block Storage API makes it possible to export the data and also the
        metadata of a particular volume, so that it can be moved and
        replicated elsewhere. More information can be found here:
        <link
        xlink:href="https://blueprints.launchpad.net/cinder/+spec/cinder-backup-volume-metadata-support">
        https://blueprints.launchpad.net/cinder/+spec/cinder-backup-volume-metadata-support</link>.
    </para>
    <para>The synchronous backups create an identical volume in both
        clouds and chooses the appropriate flavor so that each cloud
        has an identical back end. This is done by creating volumes
        through the CMP. After this is configured, a solution
        involving DRDB synchronizes the physical drives.</para>
    <para>The database component is backed up using synchronous
        backups. MySQL does not support geographically diverse
        replication, so disaster recovery is provided by replicating
        the file itself. As it is not possible to use Object Storage
        as the back end of a database like MySQL, Swift replication
        is not an option. Company C decides not to store the data on
        another geo-tiered storage system, such as Ceph, as Block
        Storage. This would have given another layer of protection.
        Another option would have been to store the database on an
        OpenStack Block Storage volume and backing it up like any
        other Block Storage.</para>
    </section>
 </section>
--- a/doc/arch-design/hybrid/section_tech_considerations_hybrid.xml
+++ b/doc/arch-design/hybrid/section_tech_considerations_hybrid.xml
@ -1,196 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE section [
 <!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
 %openstack;
 ]>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="technical-considerations-hybrid">
    <?dbhtml stop-chunking?>
    <title>Technical considerations</title>
    <para>A hybrid cloud environment requires inspection and
        understanding of technical issues in external data centers that may
        not be in your control. Ideally, select an architecture
        and CMP that are adaptable to changing environments.</para>
    <para>Using diverse cloud platforms increases the risk of compatibility
        issues, but clouds using the same version and distribution
        of OpenStack are less likely to experience problems.</para>
    <para>Clouds that exclusively use the same versions of OpenStack should
        have no issues, regardless of distribution. More recent distributions
        are less likely to encounter incompatibility between versions. An
        OpenStack community initiative defines core functions that need to
        remain backward compatible between supported versions. For example, the
        DefCore initiative defines basic functions that every distribution must
        support in order to use the name <productname>OpenStack</productname>.
    </para>
    <para>Vendors can add proprietary customization to their distributions. If
        an application or architecture makes use of these features, it can be
        difficult to migrate to or use other types of environments.</para>
    <para>If an environment includes non-OpenStack clouds, it may experience
        compatibility problems. CMP tools must account for the differences in
        the handling of operations and the implementation of services.</para>
    <itemizedlist>
      <title>Possible cloud incompatibilities</title>
        <listitem>
            <para>Instance deployment</para>
        </listitem>
        <listitem>
            <para>Network management</para>
        </listitem>
        <listitem>
            <para>Application management</para>
        </listitem>
        <listitem>
            <para>Services implementation</para>
        </listitem>
    </itemizedlist>
    <section xml:id="capacity-planning-hybrid">
      <title>Capacity planning</title>
    <para>One of the primary reasons many organizations use a
        hybrid cloud is to increase capacity without making large capital
        investments.</para>
    <para>Capacity and the placement of workloads are key design considerations
        for hybrid clouds. The long-term capacity plan for these
        designs must incorporate growth over time to prevent permanent
        consumption of more expensive external clouds. To avoid this scenario,
        account for future applications' capacity requirements and plan growth
        appropriately.</para>
    <para>It is difficult to predict the amount of load a particular
        application might incur if the number of users fluctuates, or the
        application experiences an unexpected increase in use. It is
        possible to define application requirements in terms of vCPU, RAM,
        bandwidth, or other resources and plan appropriately. However, other
        clouds might not use the same meter or even the same oversubscription
        rates.</para>
    <para>Oversubscription is a method to emulate more capacity than
        may physically be present. For example, a physical
        hypervisor node with 32&nbsp;GB RAM may host 24
        instances, each provisioned with 2&nbsp;GB RAM. As long
        as all 24 instances do not concurrently use 2 full
        gigabytes, this arrangement works well. However, some
        hosts take oversubscription to extremes and, as a result,
        performance can be inconsistent. If at all
        possible, determine what the oversubscription rates of each
        host are and plan capacity accordingly.</para>
    </section>
    <section xml:id="utilization-hybrid">
      <title>Utilization</title>
    <para>A CMP must be aware of what workloads are running, where they are
        running, and their preferred utilizations. For example, in
        most cases it is desirable to run as many workloads internally
        as possible, utilizing other resources only when necessary. On
        the other hand, situations exist in which the opposite is
        true, such as when an internal cloud is only for development and
        stressing it is undesirable. A cost model of various scenarios and
        consideration of internal priorities helps with this decision. To
        improve efficiency, automate these decisions when possible.</para>
    <para>The Telemetry service (ceilometer) provides information on the usage
        of various OpenStack components. Note the following:</para>
      <itemizedlist>
        <listitem>
          <para>
            If Telemetry must retain a large amount of data, for
            example when monitoring a large or active cloud, we recommend
            using a NoSQL back end such as MongoDB.</para>
        </listitem>
        <listitem>
          <para>
            You must monitor connections to non-OpenStack clouds
            and report this information to the CMP.</para>
        </listitem>
      </itemizedlist>
    </section>
    <section xml:id="performance-hybrid">
      <title>Performance</title>
    <para>Performance is critical to hybrid cloud deployments, and they are
        affected by many of the same issues as multi-site deployments,
        such as network latency between sites. Also consider the time required
        to run a workload in different clouds and methods for reducing this
        time. This may require moving data closer to applications
        or applications closer to the data they process, and
        grouping functionality so that connections that
        require low latency take place over a single cloud rather than
        spanning clouds. This may also require a CMP that can determine which
        cloud can most efficiently run which types of workloads.</para>
    <para>As with utilization, native OpenStack tools help improve performance.
        For example, you can use Telemetry to measure performance and the
        Orchestration service (heat) to react to changes in demand.</para>
      <note>
        <para>Orchestration requires special client configurations to integrate
          with Amazon Web Services. For other types of clouds, use CMP
          features.
        </para>
      </note>
    </section>
    <section xml:id="components">
      <title>Components</title>
    <para>Using more than one cloud in any design requires consideration of
        four OpenStack tools:</para>
    <variablelist>
      <varlistentry>
        <term>OpenStack Compute (nova)</term>
        <listitem>
            <para>Regardless of deployment location, hypervisor choice has a
                direct effect on how difficult it is to integrate with
                additional clouds.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Networking (neutron)</term>
        <listitem>
            <para>Whether using OpenStack Networking (neutron) or legacy
                networking (nova-network), it is necessary to understand
                network integration capabilities in order to
                connect between clouds.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Telemetry (ceilometer)</term>
        <listitem>
            <para>Use of Telemetry depends, in large part, on what the other
                parts of the cloud you are using.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Orchestration (heat)</term>
        <listitem>
            <para>Orchestration can be a valuable tool in orchestrating tasks a
                CMP decides are necessary in an OpenStack-based cloud.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    </section>
    <section xml:id="special-considerations-hybrid">
      <title>Special considerations</title>
    <para>Hybrid cloud deployments require consideration of two issues that
        are not common in other situations:</para>
    <variablelist>
      <varlistentry>
        <term>Image portability</term>
        <listitem>
          <para>As of the Kilo release, there is no common image format that is
          usable by all clouds. Conversion or recreation of images is necessary
          if migrating between clouds. To simplify deployment, use the smallest
          and simplest images feasible, install only what is necessary, and
          use a deployment manager such as Chef or Puppet. Do not use golden
          images to speed up the process unless you repeatedly deploy the same
          images on the same cloud.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>API differences</term>
        <listitem>
          <para>Avoid using a hybrid cloud deployment with more than just
          OpenStack (or with different versions of OpenStack) as API changes
          can cause compatibility issues.</para>
        </listitem>
      </varlistentry>
      </variablelist>
    </section>
 </section>
--- a/doc/arch-design/hybrid/section_user_requirements_hybrid.xml
+++ b/doc/arch-design/hybrid/section_user_requirements_hybrid.xml
@ -1,258 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="user-requirements-hybrid">
    <?dbhtml stop-chunking?>
    <title>User requirements</title>
    <para>Hybrid cloud architectures are complex, especially those
       that use heterogeneous cloud platforms. Ensure that design choices
       match requirements so that the benefits outweigh the inherent additional
       complexity and risks.</para>
    <variablelist>
      <title>Business considerations when designing a hybrid
        cloud deployment</title>
      <varlistentry>
        <term>Cost</term>
        <listitem>
            <para>A hybrid cloud architecture involves multiple
                vendors and technical architectures. These
                architectures may be more expensive to deploy and
                maintain. Operational costs can be higher because of
                the need for more sophisticated orchestration and
                brokerage tools than in other architectures. In
                contrast, overall operational costs might be lower by
                virtue of using a cloud brokerage tool to deploy the
                workloads to the most cost effective platform.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Revenue opportunity</term>
        <listitem>
            <para>Revenue opportunities vary based on the intent and use case
                of the cloud. As a commercial, customer-facing product, you
                must consider whether building over multiple platforms makes
                the design more attractive to customers.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Time-to-market</term>
        <listitem>
            <para>One common reason to use cloud platforms is to improve the
                time-to-market of a new product or application. For example,
                using multiple cloud platforms is viable because there is an
                existing investment in several applications. It is faster to
                tie the investments together rather than migrate the
                components and refactoring them to a single platform.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Business or technical diversity</term>
        <listitem>
            <para>Organizations leveraging cloud-based services can
                embrace business diversity and utilize a hybrid cloud
                design to spread their workloads across multiple cloud
                providers. This ensures that no single cloud provider is
                the sole host for an application.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Application momentum</term>
        <listitem>
            <para>Businesses with existing applications may find that it is
                more cost effective to integrate applications on multiple
                cloud platforms than migrating them to a single platform.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    <section xml:id="workload-considerations">
      <title>Workload considerations</title>
    <para>A workload can be a single application or a suite of applications
      that work together. It can also be a duplicate set of applications that
      need to run on multiple cloud environments. In a hybrid cloud
      deployment, the same workload often needs to function
      equally well on radically different public and private cloud
      environments. The architecture needs to address these
      potential conflicts, complexity, and platform
      incompatibilities.</para>
    <variablelist>
      <title>Use cases for a hybrid cloud architecture</title>
      <varlistentry>
      <term>Dynamic resource expansion or bursting</term>
        <listitem>
            <para>An application that requires additional resources may suit
                a multiple cloud architecture.
                For example, a retailer needs additional resources
                during the holiday season, but does not want to add private
                cloud resources to meet the peak demand. The user can
                accommodate the increased load by bursting to
                a public cloud for these peak load
                periods. These bursts could be for long or short
                cycles ranging from hourly to yearly.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
      <term>Disaster recovery and business continuity</term>
        <listitem>
            <para>Cheaper storage makes the public
                cloud suitable for maintaining backup applications.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
      <term>Federated hypervisor and instance management</term>
        <listitem>
            <para>Adding self-service, charge back, and transparent delivery of
                the resources from a federated pool can be cost
                effective. In a hybrid cloud environment, this is a
                particularly important consideration. Look for a cloud
                that provides cross-platform hypervisor support and
                robust instance management tools.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
      <term>Application portfolio integration</term>
        <listitem>
            <para>An enterprise cloud delivers efficient application portfolio
                management and deployments by leveraging
                self-service features and rules according to use. Integrating
                existing cloud environments is a common driver when building
                hybrid cloud architectures.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
      <term>Migration scenarios</term>
        <listitem>
            <para>Hybrid cloud architecture enables the migration of
                applications between different clouds.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
      <term>High availability</term>
        <listitem>
            <para>A combination of locations and platforms enables a
                level of availability that is not
                possible with a single platform. This approach increases
                design complexity.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    <para>As running a workload on multiple cloud platforms increases design
        complexity, we recommend first exploring options such as transferring
        workloads across clouds at the application, instance, cloud platform,
        hypervisor, and network levels.</para>
    </section>
    <section xml:id="tools-considerations-hybrid">
      <title>Tools considerations</title>
    <para>Hybrid cloud designs must incorporate tools to facilitate working
        across multiple clouds.</para>
    <variablelist>
      <title>Tool functions</title>
     <varlistentry>
      <term>Broker between clouds</term>
        <listitem>
            <para>Brokering software evaluates relative costs between different
                cloud platforms. Cloud Management Platforms (CMP)
                allow the designer to determine the right location for the
                workload based on predetermined criteria.</para>
        </listitem>
     </varlistentry>
     <varlistentry>
      <term>Facilitate orchestration across the clouds</term>
        <listitem>
            <para>CMPs simplify the migration of application workloads between
                public, private, and hybrid cloud platforms. We recommend
                using cloud orchestration tools for managing a diverse
                portfolio of systems and applications across multiple cloud
                platforms.</para>
        </listitem>
     </varlistentry>
    </variablelist>
    </section>
    <section xml:id="network-considerations-hybrid">
      <title>Network considerations</title>
    <para>It is important to consider the functionality, security, scalability,
        availability, and testability of network when choosing a CMP and cloud
        provider.</para>
    <itemizedlist>
        <listitem>
            <para>Decide on a network framework and
                design minimum functionality tests. This ensures
                testing and functionality persists during and after
                upgrades.</para>
        </listitem>
        <listitem>
            <para>Scalability across multiple cloud providers may
                dictate which underlying network framework you
                choose in different cloud providers. It is important
                to present the network API functions and to
                verify that functionality persists across all cloud
                endpoints chosen.</para>
        </listitem>
        <listitem>
            <para>High availability implementations vary in
                functionality and design. Examples of some common
                methods are active-hot-standby, active-passive, and
                active-active. Development of high availability and test
                frameworks is necessary to insure understanding of
                functionality and limitations.</para>
        </listitem>
        <listitem>
            <para>Consider the security of data between the client and the
                endpoint, and of traffic that traverses the multiple
                clouds.</para>
        </listitem>
    </itemizedlist>
    </section>
    <section xml:id="risk-mitigation-management-hybrid">
      <title>Risk mitigation and management considerations</title>
    <para>Hybrid cloud architectures introduce additional risk because
        they are more complex than a single cloud design and may involve
        incompatible components or tools. However, they also reduce
        risk by spreading workloads over multiple providers.</para>
    <variablelist>
      <title>Hybrid cloud risks</title>
      <varlistentry>
        <term>Provider availability or implementation details</term>
        <listitem>
            <para>
                Business changes can affect provider availability. Likewise,
                changes in a provider's service can disrupt a hybrid cloud
                environment or increase costs.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Differing SLAs</term>
        <listitem>
            <para>Hybrid cloud designs must accommodate differences in SLAs
                between providers, and consider their enforceability.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Security levels</term>
        <listitem>
            <para>Securing multiple cloud
                environments is more complex than securing single
                cloud environments. We recommend addressing concerns at
                the application, network, and cloud platform levels.
                Be aware that each cloud platform approaches security
                differently, and a hybrid cloud design must address and
                compensate for these differences.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>Provider API changes</term>
        <listitem>
            <para>Consumers of external clouds rarely have control over
                provider changes to APIs, and changes can break compatibility.
                Using only the most common and basic APIs can minimize
                potential conflicts.</para>
        </listitem>
      </varlistentry>
    </variablelist>
  </section>
 </section>
--- a/doc/arch-design/introduction/section_how_this_book_is_organized.xml
+++ b/doc/arch-design/introduction/section_how_this_book_is_organized.xml
@ -1,106 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="arch-guide-how-this-book-is-organized">
    <title>How this book is organized</title>
    <para>This book examines some of the most common uses for OpenStack
      clouds, and explains the considerations for each use case.
      Cloud architects may use this book as a comprehensive guide by
      reading all of the use cases, but it is also possible to review
      only the chapters which pertain to a specific use case.
      The use cases covered in this guide include:</para>
    <itemizedlist>
        <listitem>
            <para>
              <link linkend="generalpurpose">General purpose</link>: Uses common components that address
              80% of common use cases.
            </para>
        </listitem>
        <listitem>
            <para>
              <link linkend="compute_focus">Compute focused</link>: For compute intensive workloads
              such as high performance computing (HPC).
            </para>
        </listitem>
        <listitem>
            <para>
              <link linkend="storage_focus">Storage focused</link>: For storage intensive workloads such as
              data analytics with parallel file systems.
            </para>
        </listitem>
        <listitem>
            <para>
              <link linkend="network_focus">Network focused</link>: For high performance and reliable
              networking, such as a <glossterm
              >content delivery network (CDN)</glossterm>.
            </para>
        </listitem>
        <listitem>
            <para>
              <link linkend="multi_site">Multi-site</link>: For applications that require multiple site
              deployments for geographical, reliability or data
              locality reasons.
            </para>
        </listitem>
        <listitem>
            <para>
              <link linkend="hybrid">Hybrid cloud</link>: Uses multiple disparate clouds
              connected either for failover, hybrid cloud bursting, or
              availability.
            </para>
        </listitem>
        <listitem>
            <para>
              <link linkend="massively_scalable">Massively
              scalable</link>: For
              cloud service providers or other large
              installations
            </para>
        </listitem>
        <listitem>
            <para>
              <link linkend="specialized">Specialized cases</link>: Architectures that have not
              previously been covered in the defined use cases.
            </para>
        </listitem>
    </itemizedlist>
    <!-- This section is currrently commented out as it is irrelevant within the current
    context. However, there are plans to use this list in the future. Please do not remove.
    <para>Each chapter in the guide is then further broken down into
        the following sections:</para>
    <itemizedlist>
        <listitem>
            <para>Introduction: Provides an overview of the
                architectural use case.</para>
        </listitem>
        <listitem>
            <para>User requirements: Defines the set of user
                considerations that typically come into play for that
                use case.</para>
        </listitem>
        <listitem>
            <para>Technical considerations: Covers the technical
                issues that must be accounted when dealing with this
                use case.</para>
        </listitem>
        <listitem>
            <para>Operational considerations: Covers the ongoing
                operational tasks associated with this use case and
                architecture.</para>
        </listitem>
        <listitem>
            <para>Architecture: Covers the overall architecture
                associated with the use case.</para>
        </listitem>
        <listitem>
            <para>Prescriptive examples: Presents one or more
                scenarios where this architecture could be
                deployed.</para>
        </listitem>
    </itemizedlist>
 -->
 </section>
--- a/doc/arch-design/introduction/section_how_this_book_was_written.xml
+++ b/doc/arch-design/introduction/section_how_this_book_was_written.xml
@ -1,95 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="arch-guide-why-and-who-we-wrote-this-book">
    <title>Why and how we wrote this book</title>
    <para>We wrote this book to guide you through designing an OpenStack cloud
        architecture. This guide identifies design considerations
        for common cloud use cases and provides examples.</para>
    <para>The Architecture Design Guide was written in a book sprint format,
        which is a facilitated, rapid development production method for books.
        The Book Sprint was facilitated by Faith Bosworth and Adam
        Hyde of Book Sprints, for more information, see the Book Sprints website
        (www.booksprints.net).</para>
    <para>This book was written in five days during July 2014 while
        exhausting the M&amp;M, Mountain Dew and healthy options
        supply, complete with juggling entertainment during lunches at
        VMware's headquarters in Palo Alto.</para>
    <para>We would like to thank VMware for their generous
        hospitality, as well as our employers, Cisco, Cloudscaling,
        Comcast, EMC, Mirantis, Rackspace, Red Hat, Verizon, and
        VMware, for enabling us to contribute our time. We would
        especially like to thank Anne Gentle and Kenneth Hui for all
        of their shepherding and organization in making this
        happen.</para>
    <para>The author team includes:</para>
    <itemizedlist>
        <listitem>
            <para>Kenneth Hui (EMC)
                  <link xlink:href="http://twitter.com/hui_kenneth"
                  >@hui_kenneth</link></para>
        </listitem>
        <listitem>
            <para>Alexandra Settle (Rackspace)
                  <link xlink:href="http://twitter.com/dewsday"
                  >@dewsday</link></para>
        </listitem>
        <listitem>
            <para>Anthony Veiga (Comcast)
                  <link xlink:href="http://twitter.com/daaelar"
                  >@daaelar</link></para>
        </listitem>
        <listitem>
            <para>Beth Cohen (Verizon)
                  <link xlink:href="http://twitter.com/bfcohen"
                  >@bfcohen</link></para>
        </listitem>
        <listitem>
            <para>Kevin Jackson (Rackspace)
                  <link xlink:href="http://twitter.com/itarchitectkev"
                  >@itarchitectkev</link></para>
        </listitem>
        <listitem>
            <para>Maish Saidel-Keesing (Cisco)
                  <link xlink:href="http://twitter.com/maishsk"
                  >@maishsk</link></para>
        </listitem>
        <listitem>
            <para>Nick Chase (Mirantis)
                  <link xlink:href="http://twitter.com/NickChase"
                  >@NickChase</link></para>
        </listitem>
        <listitem>
            <para>Scott Lowe (VMware)
                  <link xlink:href="http://twitter.com/scott_lowe"
                  >@scott_lowe</link></para>
        </listitem>
        <listitem>
            <para>Sean Collins (Comcast)
                  <link xlink:href="http://twitter.com/sc68cal"
                  >@sc68cal</link></para>
        </listitem>
        <listitem>
            <para>Sean Winn (Cloudscaling)
                  <link xlink:href="http://twitter.com/seanmwinn"
                  >@seanmwinn</link></para>
        </listitem>
        <listitem>
            <para>Sebastian Gutierrez (Red Hat)
                  <link xlink:href="http://twitter.com/gutseb"
                  >@gutseb</link></para>
        </listitem>
        <listitem>
            <para>Stephen Gordon (Red Hat)
                  <link xlink:href="http://twitter.com/xsgordon"
                  >@xsgordon</link></para>
        </listitem>
        <listitem>
            <para>Vinny Valdez (Red Hat)
                  <link xlink:href="http://twitter.com/VinnyValdez"
                  >@VinnyValdez</link></para>
        </listitem>
    </itemizedlist>
 </section>
--- a/doc/arch-design/introduction/section_intended_audience.xml
+++ b/doc/arch-design/introduction/section_intended_audience.xml
@ -1,18 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="arch-guide-intended-audience">
  <title>Intended audience</title>
    <para>This book has been written for architects and designers of
        OpenStack clouds. For a guide on deploying and operating
        OpenStack, please refer to the <citetitle>OpenStack Operations
        Guide</citetitle> (<link
        xlink:href="http://docs.openstack.org/openstack-ops">http://docs.openstack.org/openstack-ops</link>).
    </para>
    <para>Before reading this book, we recommend prior knowledge of cloud architecture
        and principles, experience in enterprise system design, Linux
        and virtualization experience, and a basic understanding of
        networking principles and protocols.</para>
 </section>
--- a/doc/arch-design/introduction/section_methodology.xml
+++ b/doc/arch-design/introduction/section_methodology.xml
@ -1,204 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE section [
 <!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
 %openstack;
 ]>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="methodology">
    <title>Methodology</title>
    <para>The best way to design your cloud architecture is through creating and
      testing use cases. Planning for applications that support thousands of
      sessions per second, variable workloads, and complex, changing data,
      requires you to identify the key meters. Identifying these key meters,
      such as number of concurrent transactions per second, and size of
      database, makes it possible to build a method for testing your assumptions.</para>
    <para>Use a functional user scenario to develop test cases, and to measure
      overall project trajectory.</para>
      <note>
        <para>If you do not want to use an application to develop user
          requirements automatically, you need to create requirements to build
          test harnesses and develop usable meters.</para>
      </note>
    <para>Establishing these meters allows you to respond to changes quickly without
       having to set exact requirements in advance.
       This creates ways to configure the system, rather than redesigning
       it every time there is a requirements change.</para>
      <important>
        <para>It is important to limit scope creep. Ensure you address tool limitations,
          but do not recreate the entire suite of tools. Work
          with technical product owners to establish critical features that are needed
          for a successful cloud deployment.</para>
      </important>
    <section xml:id="application-cloud-readiness-methods">
        <title>Application cloud readiness</title>
        <para>The cloud does more than host virtual machines and their applications.
            This <emphasis>lift and shift</emphasis>
            approach works in certain situations, but there is a fundamental
            difference between clouds and traditional bare-metal-based
            environments, or even traditional virtualized environments.</para>
        <para>In traditional environments, with traditional enterprise
            applications, the applications and the servers that run on them are
            <emphasis>pets</emphasis>.
            They are lovingly crafted and cared for, the servers have
            names like Gandalf or Tardis, and if they get sick someone nurses
            them back to health. All of this is designed so that the application
            does not experience an outage.</para>
        <para>In cloud environments, servers are more like
            cattle. There are thousands of them, they get names like NY-1138-Q,
            and if they get sick, they get put down and a sysadmin installs
            another one. Traditional applications that are unprepared for this
            kind of environment may suffer outages, loss of data, or
            complete failure.</para>
        <para>There are other reasons to design applications with the cloud in mind.
            Some are defensive, such as the fact that because applications cannot be
            certain of exactly where or on what hardware they will be launched,
            they need to be flexible, or at least adaptable. Others are
            proactive. For example, one of the advantages of using the cloud is
            scalability. Applications need to be designed in such a way that
            they can take advantage of these and other opportunities.</para>
    </section>
    <section xml:id="determining-whether-an-application-is-cloud-ready">
        <title>Determining whether an application is cloud-ready</title>
        <para>There are several factors to take into consideration when looking
            at whether an application is a good fit for the cloud.</para>
            <variablelist>
              <varlistentry>
                <term>Structure</term>
                <listitem>
                  <para>
                    A large, monolithic, single-tiered, legacy
                    application typically is not a good fit for the
                    cloud. Efficiencies are gained when load can be
                    spread over several instances, so that a failure
                    in one part of the system can be mitigated without
                    affecting other parts of the system, or so that
                    scaling can take place where the app needs
                    it.
                  </para>
                </listitem>
              </varlistentry>
              <varlistentry>
                <term>Dependencies</term>
                <listitem>
                  <para>
                    Applications that depend on specific
                    hardware, such as a particular chip set or an
                    external device such as a fingerprint
                    reader, might not be a good fit for the
                    cloud, unless those dependencies are specifically
                    addressed. Similarly, if an application depends on
                    an operating system or set of libraries that
                    cannot be used in the cloud, or cannot be
                    virtualized, that is a problem.
                  </para>
                </listitem>
              </varlistentry>
              <varlistentry>
                <term>Connectivity</term>
                <listitem>
                  <para>
                    Self-contained applications, or those that depend
                    on resources that are not reachable by the cloud
                    in question, will not run. In some situations,
                    you can work around these issues with custom network
                    setup, but how well this works depends on the
                    chosen cloud environment.
                  </para>
                </listitem>
              </varlistentry>
              <varlistentry>
                <term>Durability and resilience</term>
                <listitem>
                  <para>
                    Despite the existence of SLAs, things break:
                    servers go down, network connections are
                    disrupted, or too many tenants on a server make a
                    server unusable. An application must be sturdy
                    enough to contend with these issues.
                  </para>
                </listitem>
              </varlistentry>
            </variablelist>
    </section>
    <section xml:id="designing-for-the-cloud">
        <title>Designing for the cloud</title>
        <para>Here are some guidelines to keep in mind when designing an
            application for the cloud:</para>
        <itemizedlist>
            <listitem>
                <para>Be a pessimist: Assume everything fails and design
                    backwards.</para>
            </listitem>
            <listitem>
                <para>Put your eggs in multiple baskets: Leverage multiple
                    providers, geographic regions and availability zones to
                    accommodate for local availability issues. Design for
                    portability.</para>
            </listitem>
            <listitem>
                <para>Think efficiency: Inefficient designs will not scale.
                    Efficient designs become cheaper as they scale. Kill off
                    unneeded components or capacity.</para>
            </listitem>
            <listitem>
                <para>Be paranoid: Design for defense in depth and zero
                    tolerance by building in security at every level and between
                    every component. Trust no one.</para>
            </listitem>
            <listitem>
                <para>But not too paranoid: Not every application needs the
                    platinum solution. Architect for different SLA's, service
                    tiers, and security levels.</para>
            </listitem>
            <listitem>
                <para>Manage the data: Data is usually the most inflexible and
                    complex area of a cloud and cloud integration architecture.
                    Do not short change the effort in analyzing and addressing
                    data needs.</para>
            </listitem>
            <listitem>
                <para>Hands off: Leverage automation to increase consistency and
                    quality and reduce response times.</para>
            </listitem>
            <listitem>
                <para>Divide and conquer: Pursue partitioning and
                    parallel layering wherever possible. Make components as small
                    and portable as possible. Use load balancing between layers.
                </para>
            </listitem>
            <listitem>
                <para>Think elasticity: Increasing resources should result in a
                    proportional increase in performance and scalability.
                    Decreasing resources should have the opposite effect.
                </para>
            </listitem>
            <listitem>
                <para>Be dynamic: Enable dynamic configuration changes such as
                    auto scaling, failure recovery and resource discovery to
                    adapt to changing environments, faults, and workload volumes.
                </para>
            </listitem>
            <listitem>
                <para>Stay close: Reduce latency by moving highly interactive
                    components and data near each other.</para>
            </listitem>
            <listitem>
                <para>Keep it loose: Loose coupling, service interfaces,
                    separation of concerns, abstraction, and well defined API's
                    deliver flexibility.</para>
            </listitem>
            <listitem>
                <para>Be cost aware: Autoscaling, data transmission, virtual
                    software licenses, reserved instances, and similar costs can rapidly
                    increase monthly usage charges. Monitor usage closely.
                </para>
            </listitem>
        </itemizedlist>
    </section>
 </section>
--- a/doc/arch-design/locale/arch-design.pot
+++ b/doc/arch-design/locale/arch-design.pot
--- a/doc/arch-design/locale/zh_CN.po
+++ b/doc/arch-design/locale/zh_CN.po
--- a/doc/arch-design/massively_scalable/section_operational_considerations_massively_scalable.xml
+++ b/doc/arch-design/massively_scalable/section_operational_considerations_massively_scalable.xml
@ -1,102 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="operational-considerations-massive-scale">
    <?dbhtml stop-chunking?>
    <title>Operational considerations</title>
    <para>In order to run efficiently at massive scale, automate
        as many of the operational processes as
        possible. Automation includes the configuration of
        provisioning, monitoring and alerting systems. Part of the
        automation process includes the capability to determine when
        human intervention is required and who should act. The
        objective is to increase the ratio of operational staff to
        running systems as much as possible in order to reduce maintenance
        costs. In a massively scaled environment, it is very difficult
        for staff to give each system individual care.</para>
    <para>Configuration management tools such as Puppet and Chef enable
        operations staff to categorize systems into groups based on
        their roles and thus create configurations and system states
        that the provisioning system enforces. Systems
        that fall out of the defined state due to errors or failures
        are quickly removed from the pool of active nodes and
        replaced.</para>
    <para>At large scale the resource cost of diagnosing failed individual
        systems is far greater than the cost of
        replacement. It is more economical to replace the failed
        system with a new system, provisioning and configuring it
        automatically and adding it to the pool of active nodes.
        By automating tasks that are labor-intensive,
        repetitive, and critical to operations, cloud operations
        teams can work more
        efficiently because fewer resources are required for these
        common tasks. Administrators are then free to tackle
        tasks that are not easy to automate and that have longer-term
        impacts on the business, for example, capacity planning.</para>
    <section xml:id="the-bleeding-edge">
      <title>The bleeding edge</title>
    <para>Running OpenStack at massive scale requires striking a
        balance between stability and features. For example, it might
        be tempting to run an older stable release branch of OpenStack
        to make deployments easier. However, when running at massive
        scale, known issues that may be of some concern or only have
        minimal impact in smaller deployments could become pain points.
        Recent releases may address well known issues. The OpenStack
        community can help resolve reported issues by applying
        the collective expertise of the OpenStack developers.</para>
    <para>The number of organizations running at
        massive scales is a small proportion of the
        OpenStack community, therefore it is important to share
        related issues with the community and be a vocal advocate for
        resolving them. Some issues only manifest when operating at
        large scale, and the number of organizations able to duplicate
        and validate an issue is small, so it is important to
        document and dedicate resources to their resolution.</para>
    <para>In some cases, the resolution to the problem is ultimately
        to deploy a more recent version of OpenStack. Alternatively,
        when you must resolve an issue in a production
        environment where rebuilding the entire environment is not an
        option, it is sometimes possible to deploy updates to specific
        underlying components in order to resolve issues or gain
        significant performance improvements. Although this may appear
        to expose the deployment to
        increased risk and instability, in many cases it
        could be an undiscovered issue.</para>
    <para>We recommend building a development and operations
        organization that is responsible for creating desired
        features, diagnosing and resolving issues, and building the
        infrastructure for large scale continuous integration tests
        and continuous deployment. This helps catch bugs early and
        makes deployments faster and easier. In addition to
        development resources, we also recommend the recruitment
        of experts in the fields of message queues, databases, distributed
        systems, networking, cloud, and storage.</para></section>
    <section xml:id="growth-and-capacity-planning">
      <title>Growth and capacity planning</title>
    <para>An important consideration in running at massive scale is
        projecting growth and utilization trends in order to plan capital
        expenditures for the short and long term. Gather utilization
        meters for compute, network, and storage, along with historical
        records of these meters. While securing major
        anchor tenants can lead to rapid jumps in the utilization
        rates of all resources, the steady adoption of the cloud
        inside an organization or by consumers in a public
        offering also creates a steady trend of increased
        utilization.</para></section>
    <section xml:id="skills-and-training">
      <title>Skills and training</title>
    <para>Projecting growth for storage, networking, and compute is
        only one aspect of a growth plan for running OpenStack at
        massive scale. Growing and nurturing development and
        operational staff is an additional consideration. Sending team
        members to OpenStack conferences, meetup events, and
        encouraging active participation in the mailing lists and
        committees is a very important way to maintain skills and
        forge relationships in the community. For a list of OpenStack
        training providers in the marketplace, see: <link
        xlink:href="http://www.openstack.org/marketplace/training/">http://www.openstack.org/marketplace/training/</link>.
    </para>
    </section>
 </section>
--- a/doc/arch-design/massively_scalable/section_tech_considerations_massively_scalable.xml
+++ b/doc/arch-design/massively_scalable/section_tech_considerations_massively_scalable.xml
@ -1,131 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE section [
 <!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
 %openstack;
 ]>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="technical-considerations-massive-scale">
    <?dbhtml stop-chunking?>
    <title>Technical considerations</title>
    <para>Repurposing an existing OpenStack environment to be
        massively scalable is a formidable task. When building
        a massively scalable environment from the ground up, ensure
        you build the initial deployment with the same principles
        and choices that apply as the environment grows. For example,
        a good approach is to deploy the first site as a multi-site
        environment. This enables you to use the same deployment
        and segregation methods as the environment grows to separate
        locations across dedicated links or wide area networks. In
        a hyperscale cloud, scale trumps redundancy. Modify applications
        with this in mind, relying on the scale and homogeneity of the
        environment to provide reliability rather than redundant
        infrastructure provided by non-commodity hardware
        solutions.</para>
    <section xml:id="infrastructure-segregation-massive-scale">
      <title>Infrastructure segregation</title>
    <para>OpenStack services support massive horizontal scale.
        Be aware that this is not the case for the entire supporting
        infrastructure. This is particularly a problem for the database
        management systems and message queues that OpenStack services
        use for data storage and remote procedure call communications.</para>
    <para>Traditional clustering techniques typically
        provide high availability and some additional scale for these
        environments. In the quest for massive scale, however, you must
        take additional steps to relieve the performance
        pressure on these components in order to prevent them from negatively
        impacting the overall performance of the environment. Ensure that
        all the components are in balance so that if the massively
        scalable environment fails, all the components are near maximum
        capacity and a single component is not causing the failure.</para>
    <para>Regions segregate completely independent
        installations linked only by an Identity and Dashboard
        (optional) installation. Services have separate
        API endpoints for each region, and include separate database
        and queue installations. This exposes some awareness of the
        environment's fault domains to users and gives them the
        ability to ensure some degree of application resiliency while
        also imposing the requirement to specify which region to apply
        their actions to.</para>
    <para>Environments operating at massive scale typically need their
        regions or sites subdivided further without exposing the
        requirement to specify the failure domain to the user. This
        provides the ability to further divide the installation into
        failure domains while also providing a logical unit for
        maintenance and the addition of new hardware. At hyperscale,
        instead of adding single compute nodes, administrators can add
        entire racks or even groups of racks at a time with each new
        addition of nodes exposed via one of the segregation concepts
        mentioned herein.</para>
    <para><glossterm baseform="cell">Cells</glossterm> provide the ability
        to subdivide the compute portion
        of an OpenStack installation, including regions, while still
        exposing a single endpoint. Each region has an API cell
        along with a number of compute cells where the
        workloads actually run. Each cell has its own database and
        message queue setup (ideally clustered), providing the ability
        to subdivide the load on these subsystems, improving overall
        performance.</para>
    <para>Each compute cell provides a complete compute installation,
        complete with full database and queue installations,
        scheduler, conductor, and multiple compute hosts. The cells
        scheduler handles placement of user requests from the single
        API endpoint to a specific cell from those available. The
        normal filter scheduler then handles placement within the
        cell.</para>
    <para>Unfortunately, Compute is the only OpenStack service that
        provides good support for cells. In addition, cells
        do not adequately support some standard
        OpenStack functionality such as security groups and host
        aggregates. Due to their relative newness and specialized use,
        cells receive relatively little testing in the OpenStack gate.
        Despite these issues, cells play an important role in
        well known OpenStack installations operating at massive scale,
        such as those at CERN and Rackspace.</para></section>
    <section xml:id="host-aggregates">
      <title>Host aggregates</title>
    <para>Host aggregates enable partitioning of OpenStack Compute
        deployments into logical groups for load balancing and
        instance distribution. You can also use host aggregates to
        further partition an availability zone. Consider a cloud which
        might use host aggregates to partition an availability zone
        into groups of hosts that either share common resources, such
        as storage and network, or have a special property, such as
        trusted computing hardware. You cannot target host aggregates
        explicitly. Instead, select instance flavors that map to host
        aggregate metadata. These flavors target host aggregates
        implicitly.</para></section>
    <section xml:id="availability-zones">
      <title>Availability zones</title>
    <para>Availability zones provide another mechanism for subdividing
        an installation or region. They are, in effect, host
        aggregates exposed for (optional) explicit targeting
        by users.</para>
    <para>Unlike cells, availability zones do not have their own database
        server or queue broker but represent an arbitrary grouping of
        compute nodes. Typically, nodes are grouped into availability
        zones using a shared failure domain based on a physical
        characteristic such as a shared power source or physical network
        connections. Users can target exposed availability zones; however,
        this is not a requirement. An alternative approach is to set a default
        availability zone to schedule instances to a non-default availability
        zone of <literal>nova</literal>.</para></section>
    <section xml:id="segregation-example">
      <title>Segregation example</title>
    <para>In this example the cloud is divided into two regions, one
        for each site, with two availability zones in each based on
        the power layout of the data centers. A number of host
        aggregates enable targeting of
        virtual machine instances using flavors, that require special
        capabilities shared by the target hosts such as SSDs, 10&nbsp;GbE
        networks, or GPU cards.</para>
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in"
                fileref="../figures/Massively_Scalable_Cells_+_regions_+_azs.png"
            />
        </imageobject>
    </mediaobject></section>
 </section>
--- a/doc/arch-design/massively_scalable/section_user_requirements_massively_scalable.xml
+++ b/doc/arch-design/massively_scalable/section_user_requirements_massively_scalable.xml
@ -1,135 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="user-requirements-massive-scale-overview">
    <?dbhtml stop-chunking?>
    <title>User requirements</title>
    <para>Defining user requirements for a massively scalable OpenStack
        design architecture dictates approaching the design from two
        different, yet sometimes opposing, perspectives: the cloud
        user, and the cloud operator. The expectations and perceptions
        of the consumption and management of resources of a massively
        scalable OpenStack cloud from these two perspectives are
        distinctly different.</para>
    <para>Massively scalable OpenStack clouds have the following user
        requirements:</para>
    <itemizedlist>
        <listitem>
            <para>The cloud user expects repeatable, dependable, and
                deterministic processes for launching and deploying
                cloud resources. You could deliver this through a
                web-based interface or publicly available API
                endpoints. All appropriate options for requesting
                cloud resources must be available through some type
                of user interface, a command-line interface (CLI), or
                API endpoints.</para>
        </listitem>
        <listitem>
            <para>Cloud users expect a fully self-service and
                on-demand consumption model. When an OpenStack cloud
                reaches the "massively scalable" size, expect
                consumption "as a service" in each and
                every way.</para>
        </listitem>
        <listitem>
            <para>For a user of a massively scalable OpenStack public
                cloud, there are no expectations for control over
                security, performance, or availability. Users expect
                only SLAs related to uptime of API services, and
                very basic SLAs for services offered. It is the user's
                responsibility to address these issues on their own.
                The exception to this expectation is the rare case of
                a massively scalable cloud infrastructure built for
                a private or government organization that has
                specific requirements.</para>
        </listitem>
    </itemizedlist>
    <para>The cloud user's requirements and expectations that determine
        the cloud design focus on the consumption model. The user
        expects to consume cloud resources in an automated and
        deterministic way, without any need for knowledge of the
        capacity, scalability, or other attributes of the cloud's
        underlying infrastructure.</para>
    <section xml:id="operator-requirements-massive-scale">
      <title>Operator requirements</title>
    <para>While the cloud user can be completely unaware of the
        underlying infrastructure of the cloud and its attributes, the
        operator must build and support the infrastructure for operating
        at scale. This presents a very demanding set of requirements
        for building such a cloud from the operator's perspective:</para>
    <itemizedlist>
        <listitem>
            <para>Everything must be capable of automation. For example,
                everything from compute hardware, storage hardware,
                networking hardware, to the installation and
                configuration of the supporting software. Manual
                processes are impractical in a massively scalable
                OpenStack design architecture.</para>
        </listitem>
        <listitem>
            <para>The cloud operator requires that capital expenditure
                (CapEx) is minimized at all layers of the stack.
                Operators of massively scalable OpenStack clouds
                require the use of dependable commodity hardware and
                freely available open source software components to
                reduce deployment costs and operational expenses.
                Initiatives like OpenCompute (more information
                available at <link
                xlink:href="http://www.opencompute.org">http://www.opencompute.org</link>)
                provide additional information and pointers. To cut
                costs, many operators sacrifice redundancy. For
                example, using redundant power supplies, network
                connections, and rack switches.</para>
        </listitem>
        <listitem>
            <para>Companies operating a massively scalable OpenStack
                cloud also require that operational expenditures
                (OpEx) be minimized as much as possible. We
                recommend using cloud-optimized hardware when
                managing operational overhead. Some of
                the factors to consider include power,
                cooling, and the physical design of the chassis. Through
                customization, it is possible to optimize the hardware
                and systems for this type of workload because of the
                scale of these implementations.</para>
        </listitem>
        <listitem>
            <para>Massively scalable OpenStack clouds require
                extensive metering and monitoring functionality to
                maximize the operational efficiency by keeping the
                operator informed about the status and state of the
                infrastructure. This includes full scale metering of
                the hardware and software status. A corresponding
                framework of logging and alerting is also required to
                store and enable operations to act on the meters
                provided by the metering and monitoring solutions.
                The cloud operator also needs a solution that uses the
                data provided by the metering and monitoring solution
                to provide capacity planning and capacity trending
                analysis.</para>
        </listitem>
        <listitem>
            <para>Invariably, massively scalable OpenStack clouds extend
                over several sites. Therefore, the user-operator
                requirements for a multi-site OpenStack architecture
                design are also applicable here. This includes various
                legal requirements; other jurisdictional legal or
                compliance requirements; image
                consistency-availability; storage replication and
                availability (both block and file/object storage); and
                authentication, authorization, and auditing (AAA).
                See <xref linkend="multi_site"/>
                for more details on requirements and considerations
                for multi-site OpenStack clouds.</para>
        </listitem>
        <listitem>
            <para>The design architecture of a massively scalable OpenStack
                cloud must address considerations around physical
                facilities such as space, floor weight, rack height and
                type, environmental considerations, power usage and power
                usage efficiency (PUE), and physical security.</para>
        </listitem>
    </itemizedlist></section>
 </section>
--- a/doc/arch-design/multi_site/section_architecture_multi_site.xml
+++ b/doc/arch-design/multi_site/section_architecture_multi_site.xml
@ -1,123 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="arch-design-architecture-multiple-site">
    <?dbhtml stop-chunking?>
    <title>Architecture</title>
    <para><xref linkend="multi-site_arch"/>
      illustrates a high level multi-site OpenStack
      architecture. Each site is an OpenStack cloud but it may be necessary
      to architect the sites on different versions. For example, if the
      second site is intended to be a replacement for the first site,
      they would be different. Another common design would be a private
      OpenStack cloud with a replicated site that would be used for high
      availability or disaster recovery. The most important design decision
      is configuring storage as a single shared pool or separate pools,
      depending on user and technical requirements.</para>
   <figure xml:id="multi-site_arch">
     <title>Multi-site OpenStack architecture</title>
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="6in"
                fileref="../figures/Multi-Site_shared_keystone_horizon_swift1.png"/>
        </imageobject>
    </mediaobject>
  </figure>
    <section xml:id="openstack-services-architecture">
        <title>OpenStack services architecture</title>
        <para>The Identity service, which is used by all other
            OpenStack components for authorization and the catalog of
            service endpoints, supports the concept of regions. A region
            is a logical construct used to group OpenStack services in
            close proximity to one another. The concept of
            regions is flexible; it may contain OpenStack service
            endpoints located within a distinct geographic region or regions.
            It may be smaller in scope, where a region is a single rack
            within a data center, with multiple regions existing in adjacent
            racks in the same data center.</para>
        <para>The majority of OpenStack components are designed to run
          within the context of a single region. The Compute
          service is designed to manage compute resources within a region,
          with support for subdivisions of compute resources by using
          availability zones and cells. The Networking service
          can be used to manage network resources in the same broadcast
          domain or collection of switches that are linked. The OpenStack
          Block Storage service controls storage resources within a region
          with all storage resources residing on the same storage network.
          Like the OpenStack Compute service, the OpenStack Block Storage
          service also supports the availability zone construct which can
          be used to subdivide storage resources.</para>
        <para>The OpenStack dashboard, OpenStack Identity, and OpenStack
            Object Storage services are components that can each be deployed
            centrally in order to serve multiple regions.</para>
    </section>
    <section xml:id="arch-multi-storage">
        <title>Storage</title>
        <para>With multiple OpenStack regions, it is recommended to configure
          a single OpenStack Object Storage service endpoint to deliver
          shared file storage for all regions. The Object Storage service
          internally replicates files to multiple nodes which can be used
          by applications or workloads in multiple regions. This simplifies
          high availability failover and disaster recovery rollback.</para>
        <para>In order to scale the Object Storage service to meet the workload
            of multiple regions, multiple proxy workers are run and
            load-balanced, storage nodes are installed in each region, and the
            entire Object Storage Service can be fronted by an HTTP caching
            layer. This is done so client requests for objects can be served out
            of caches rather than directly from the storage modules themselves,
            reducing the actual load on the storage network. In addition to an
            HTTP caching layer, use a caching layer like Memcache to cache
            objects between the proxy and storage nodes.</para>
        <para>If the cloud is designed with a separate Object Storage
            service endpoint made available in each region, applications are
            required to handle synchronization (if desired) and other management
            operations to ensure consistency across the nodes. For some
            applications, having multiple Object Storage Service endpoints
            located in the same region as the application may be desirable due
            to reduced latency, cross region bandwidth, and ease of
            deployment.</para>
          <note>
            <para>For the Block Storage service, the most important decisions
              are the selection of the storage technology, and whether
              a dedicated network is used to carry storage traffic
              from the storage service to the compute nodes.</para>
          </note>
    </section>
    <section xml:id="arch-networking-multiple">
        <title>Networking</title>
        <para>When connecting multiple regions together, there are several design
            considerations. The overlay network technology choice determines how
            packets are transmitted between regions and how the logical network
            and addresses present to the application. If there are security or
            regulatory requirements, encryption should be implemented to secure
            the traffic between regions. For networking inside a region, the
            overlay network technology for tenant networks is equally important.
            The overlay technology and the network traffic that an application
            generates or receives can be either complementary or serve cross
            purposes. For example, using an overlay technology for an application
            that transmits a large amount of small packets could add excessive
            latency or overhead to each packet if not configured
            properly.</para>
    </section>
    <section xml:id="arch-dependencies-multiple">
        <title>Dependencies</title>
        <para>The architecture for a multi-site OpenStack installation
          is dependent on a number of factors. One major dependency to
          consider is storage. When designing the storage system, the
          storage mechanism needs to be determined. Once the storage
          type is determined, how it is accessed is critical. For example,
          we recommend that storage should use a dedicated network.
          Another concern is how the storage is configured to protect
          the data. For example, the Recovery Point Objective (RPO) and
          the Recovery Time Objective (RTO). How quickly recovery from
          a fault can be completed, determines how often the replication of
          data is required. Ensure that enough storage is allocated to
          support the data protection strategy.
      </para>
        <para>Networking decisions include the encapsulation mechanism that can
            be used for the tenant networks, how large the broadcast domains
            should be, and the contracted SLAs for the interconnects.</para>
    </section>
 </section>
--- a/doc/arch-design/multi_site/section_operational_considerations_multi_site.xml
+++ b/doc/arch-design/multi_site/section_operational_considerations_multi_site.xml
@ -1,180 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="operational-considerations-multi-site">
    <?dbhtml stop-chunking?>
    <title>Operational considerations</title>
    <para>Multi-site OpenStack cloud deployment using regions
        requires that the service catalog contains per-region entries
        for each service deployed other than the Identity service. Most
        off-the-shelf OpenStack deployment tools have limited support
        for defining multiple regions in this fashion.</para>
    <para>Deployers should be aware of this and provide the appropriate
        customization of the service catalog for their site either
        manually, or by customizing deployment tools in use.</para>
    <note><para>As of the Kilo release, documentation for
        implementing this feature is in progress. See this bug for
        more information:
        <link
        xlink:href="https://bugs.launchpad.net/openstack-manuals/+bug/1340509">https://bugs.launchpad.net/openstack-manuals/+bug/1340509</link>.
    </para></note>
    <section xml:id="licensing">
      <title>Licensing</title>
    <para>Multi-site OpenStack deployments present additional
        licensing considerations over and above regular OpenStack
        clouds, particularly where site licenses are in use to provide
        cost efficient access to software licenses. The licensing for
        host operating systems, guest operating systems, OpenStack
        distributions (if applicable), software-defined infrastructure
        including network controllers and storage systems, and even
        individual applications need to be evaluated.</para>
    <para>Topics to consider include:</para>
    <itemizedlist>
        <listitem>
            <para>The definition of what constitutes a site
                in the relevant licenses, as the term does not
                necessarily denote a geographic or otherwise
                physically isolated location.</para>
        </listitem>
        <listitem>
            <para>Differentiations between "hot" (active) and "cold"
                (inactive) sites, where significant savings may be made
                in situations where one site is a cold standby for
                disaster recovery purposes only.</para>
        </listitem>
        <listitem>
            <para>Certain locations might require local vendors to
                provide support and services for each site which may vary
                with the licensing agreement in place.</para>
        </listitem>
    </itemizedlist></section>
    <section xml:id="logging-and-monitoring-multi-site">
      <title>Logging and monitoring</title>
    <para>Logging and monitoring does not significantly differ for a
        multi-site OpenStack cloud. The tools described in the <link
        xlink:href="http://docs.openstack.org/openstack-ops/content/logging_monitoring.html">Logging
        and monitoring chapter</link> of the <citetitle>Operations
        Guide</citetitle> remain applicable. Logging and monitoring
        can be provided on a per-site basis, and in a common
        centralized location.</para>
    <para>When attempting to deploy logging and monitoring facilities
        to a centralized location, care must be taken with the load
        placed on the inter-site networking links.</para></section>
    <section xml:id="upgrades-multi-site">
      <title>Upgrades</title>
    <para>In multi-site OpenStack clouds deployed using regions, sites
        are independent OpenStack installations which are linked
        together using shared centralized services such as OpenStack
        Identity. At a high level the recommended order of operations
        to upgrade an individual OpenStack environment is (see the <link
        xlink:href="http://docs.openstack.org/openstack-ops/content/ops_upgrades-general-steps.html">Upgrades
        chapter</link> of the <citetitle>Operations Guide</citetitle>
        for details):</para>
    <orderedlist>
        <listitem>
            <para>Upgrade the OpenStack Identity service
                (keystone).</para>
        </listitem>
        <listitem>
            <para>Upgrade the OpenStack Image service (glance).</para>
        </listitem>
        <listitem>
            <para>Upgrade OpenStack Compute (nova), including
                networking components.</para>
        </listitem>
        <listitem>
            <para>Upgrade OpenStack Block Storage (cinder).</para>
        </listitem>
        <listitem>
            <para>Upgrade the OpenStack dashboard (horizon).</para>
        </listitem>
    </orderedlist>
    <para>The process for upgrading a multi-site environment is not
        significantly different:</para>
    <orderedlist>
        <listitem>
            <para>Upgrade the shared OpenStack Identity service
                (keystone) deployment.</para>
        </listitem>
        <listitem>
            <para>Upgrade the OpenStack Image service (glance) at each
                site.</para>
        </listitem>
        <listitem>
            <para>Upgrade OpenStack Compute (nova), including
                networking components, at each site.</para>
        </listitem>
        <listitem>
            <para>Upgrade OpenStack Block Storage (cinder) at each
                site.</para>
        </listitem>
        <listitem>
            <para>Upgrade the OpenStack dashboard (horizon), at each
                site or in the single central location if it is
                shared.</para>
        </listitem>
    </orderedlist>
    <para>Compute upgrades within each site can also be performed in a rolling
        fashion. Compute controller services (API, Scheduler, and
        Conductor) can be upgraded prior to upgrading of individual
        compute nodes. This allows operations staff to keep a site
        operational for users of Compute services while performing an
        upgrade.</para></section>
    <section xml:id="quota-management-multi-site">
      <title>Quota management</title>
      <para>Quotas are used to set operational limits to prevent system
        capacities from being exhausted without notification. They are
        currently enforced at the tenant (or project) level rather than
        at the user level.</para>
      <para>Quotas are defined on a per-region basis. Operators can
        define identical quotas for tenants in each region of the
        cloud to provide a consistent experience, or even create a
        process for synchronizing allocated quotas across regions. It
        is important to note that only the operational limits imposed
        by the quotas will be aligned consumption of quotas by users
        will not be reflected between regions.</para>
    <para>For example, given a cloud with two regions, if the operator
        grants a user a quota of 25 instances in each region then that
        user may launch a total of 50 instances spread across both
        regions. They may not, however, launch more than 25 instances
        in any single region.</para>
    <para>For more information on managing quotas refer to the
        <link
        xlink:href="http://docs.openstack.org/openstack-ops/content/projects_users.html">Managing
        projects and users chapter</link> of the <citetitle>OpenStack
        Operators Guide</citetitle>.</para>
    </section>
    <section xml:id="policy-management-multi-site">
      <title>Policy management</title>
    <para>OpenStack provides a default set of Role Based Access
        Control (RBAC) policies, defined in a <filename>policy.json</filename> file, for
        each service. Operators edit these files to customize the
        policies for their OpenStack installation. If the application
        of consistent RBAC policies across sites is a requirement, then
        it is necessary to ensure proper synchronization of the
        <filename>policy.json</filename> files to all installations.</para>
    <para>This must be done using system administration tools
        such as rsync as functionality for synchronizing policies
        across regions is not currently provided within OpenStack.</para></section>
    <section xml:id="documentation-multi-site">
      <title>Documentation</title>
    <para>Users must be able to leverage cloud infrastructure and
        provision new resources in the environment. It is important
        that user documentation is accessible by users to ensure they
        are given sufficient information to help them leverage the cloud.
        As an example, by default OpenStack schedules instances on a compute node
        automatically. However, when multiple regions are available,
        the end user needs to decide in which region to schedule the
        new instance. The dashboard presents the user with
        the first region in your configuration. The API and CLI tools
        do not execute commands unless a valid region is specified.
        It is therefore important to provide documentation to your
        users describing the region layout as well as calling out that
        quotas are region-specific. If a user reaches his or her quota
        in one region, OpenStack does not automatically build new
        instances in another. Documenting specific examples helps
        users understand how to operate the cloud, thereby reducing
        calls and tickets filed with the help desk.</para></section>
 </section>
--- a/doc/arch-design/multi_site/section_prescriptive_examples_multi_site.xml
+++ b/doc/arch-design/multi_site/section_prescriptive_examples_multi_site.xml
@ -1,236 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE section [
 <!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
 %openstack;
 ]>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="prescriptive-example-multisite">
    <?dbhtml stop-chunking?>
    <title>Prescriptive examples</title>
    <para>There are multiple ways to build a multi-site OpenStack
        installation, based on the needs of the intended workloads.
        Below are example architectures based on different
        requirements. These examples are meant as a reference, and not
        a hard and fast rule for deployments. Use the previous
        sections of this chapter to assist in selecting specific
        components and implementations based on specific needs.</para>
    <para>A large content provider needs to deliver content to
        customers that are geographically dispersed. The workload is
        very sensitive to latency and needs a rapid response to
        end-users. After reviewing the user, technical and operational
        considerations, it is determined beneficial to build a number
        of regions local to the customer's edge. Rather than build a
        few large, centralized data centers, the intent of the architecture
        is to provide a pair of small data centers in locations that
        are closer to the customer. In this use
        case, spreading applications out allows for different
        horizontal scaling than a traditional compute workload scale.
        The intent is to scale by creating more copies of the
        application in closer proximity to the users that need it
        most, in order to ensure faster response time to user
        requests. This provider deploys two datacenters at each of
        the four chosen regions. The implications of this design are
        based around the method of placing copies of resources in each
        of the remote regions. Swift objects, Glance images, and block
        storage need to be manually replicated into each region.
        This may be beneficial for some systems, such as the case of
        content service, where only some of the content needs to exist
        in some but not all regions. A centralized Keystone is
        recommended to ensure authentication and that access to the
        API endpoints is easily manageable.</para>
    <para>It is recommended that you install an automated DNS system such
        as Designate. Application administrators need a way to
        manage the mapping of which application copy exists in each
        region and how to reach it, unless an external Dynamic DNS system
        is available. Designate assists by making the process automatic
        and by populating the records in the each region's zone.</para>
    <para>Telemetry for each region is also deployed, as each region
        may grow differently or be used at a different rate.
        Ceilometer collects each region's meters from each
        of the controllers and report them back to a central location.
        This is useful both to the end user and the administrator of
        the OpenStack environment. The end user will find this method
        useful, as it makes possible to determine if certain
        locations are experiencing higher load than others, and take
        appropriate action. Administrators also benefit by
        possibly being able to forecast growth per region, rather than
        expanding the capacity of all regions simultaneously,
        therefore maximizing the cost-effectiveness of the multi-site
        design.</para>
    <para>One of the key decisions of running this infrastructure is
        whether or not to provide a redundancy
        model. Two types of redundancy and high availability models in
        this configuration can be implemented. The first type
        is the availability of central OpenStack
        components. Keystone can be made highly available in three
        central data centers that host the centralized OpenStack
        components. This prevents a loss of any one of the regions
        causing an outage in service. It also has the added benefit of
        being able to run a central storage repository as a primary
        cache for distributing content to each of the regions.</para>
    <para>The second redundancy type is the edge data center itself.
        A second data center in each of the edge regional
        locations house a second region near the first region. This
        ensures that the application does not suffer degraded
        performance in terms of latency and availability.</para>
      <para><xref linkend="multi-site_customer_edge"/> depicts
        the solution designed to have both a centralized set of core
        data centers for OpenStack services and paired edge data centers:</para>
      <figure xml:id="multi-site_customer_edge">
        <title>Multi-site architecture example</title>
        <mediaobject>
        <imageobject>
            <imagedata contentwidth="6in"
                fileref="../figures/Multi-Site_Customer_Edge.png"/>
        </imageobject>
      </mediaobject>
      </figure>
    <section xml:id="geo-redundant-load-balancing">
      <title>Geo-redundant load balancing</title>
    <para>A large-scale web application has been designed with cloud
        principles in mind. The application is designed provide
        service to application store, on a 24/7 basis. The company has
        typical two tier architecture with a web front-end servicing the
        customer requests, and a NoSQL database back end storing the
        information.</para>
    <para>As of late there has been several outages in number of major
        public cloud providers due to applications running out of
        a single geographical location. The design therefore should
        mitigate the chance of a single site causing an outage for their
        business.</para>
    <para>The solution would consist of the following OpenStack
        components:</para>
    <itemizedlist>
        <listitem>
            <para>A firewall, switches and load balancers on the
                public facing network connections.</para>
        </listitem>
        <listitem>
            <para>OpenStack Controller services running, Networking,
                dashboard, Block Storage and Compute running locally in
                each of the three regions. Identity service, Orchestration
                service, Telemetry service, Image service and
                Object Storage service can be installed centrally, with
                nodes in each of the region providing a redundant
                OpenStack Controller plane throughout the globe.</para>
        </listitem>
        <listitem>
            <para>OpenStack Compute nodes running the KVM
                hypervisor.</para>
        </listitem>
        <listitem>
            <para>OpenStack Object Storage for serving static objects
                such as images can be used to ensure that all images
                are standardized across all the regions, and
                replicated on a regular basis.</para>
        </listitem>
        <listitem>
            <para>A distributed DNS service available to all
                regions that allows for dynamic update of DNS
                records of deployed instances.</para>
        </listitem>
        <listitem>
            <para>A geo-redundant load balancing service can be used
                to service the requests from the customers based on
                their origin.</para>
        </listitem>
    </itemizedlist>
    <para>An autoscaling heat template can be used to deploy the
        application in the three regions. This template includes:</para>
    <itemizedlist>
        <listitem>
            <para>Web Servers, running Apache.</para>
        </listitem>
        <listitem>
            <para>Appropriate <literal>user_data</literal> to populate the central DNS
                servers upon instance launch.</para>
        </listitem>
        <listitem>
            <para>Appropriate Telemetry alarms that maintain state of
                the application and allow for handling of region or
                instance failure.</para>
        </listitem>
    </itemizedlist>
    <para>Another autoscaling Heat template can be used to deploy a
        distributed MongoDB shard over the three locations, with the
        option of storing required data on a globally available swift
        container. According to the usage and load on the database
        server, additional shards can be provisioned according to
        the thresholds defined in Telemetry.</para>
 <!--    <para>The reason that three regions were selected here was because of
        the fear of having abnormal load on a single region in the
        event of a failure. Two data center would have been sufficient
      had the requirements been met.</para>-->
    <para>Two data centers would have been sufficient had the requirements
        been met. But three regions are selected here to avoid abnormal
        load on a single region in the event of a failure.</para>
    <para>Orchestration is used because of the built-in functionality of
        autoscaling and auto healing in the event of increased load.
        Additional configuration management tools, such as Puppet or
        Chef could also have been used in this scenario, but were not
        chosen since Orchestration had the appropriate built-in
        hooks into the OpenStack cloud, whereas the other tools were
        external and not native to OpenStack. In addition, external
        tools were not needed since this deployment scenario was straight
        forward.</para>
    <para>OpenStack Object Storage is used here to serve as a back end for
        the Image service since it is the most suitable solution for a
        globally distributed storage solution with its own
        replication mechanism. Home grown solutions could also have
        been used including the handling of replication, but were not
        chosen, because Object Storage is already an intricate part of the
        infrastructure and a proven solution.</para>
    <para>An external load balancing service was used and not the
        LBaaS in OpenStack because the solution in OpenStack is not
        redundant and does not have any awareness of geo location.</para>
      <figure xml:id="multi-site_geo_redundant">
        <title>Multi-site geo-redundant architecture</title>
      <mediaobject>
        <imageobject>
            <imagedata contentwidth="6in"
                fileref="../figures/Multi-site_Geo_Redundant_LB.png"/>
        </imageobject>
      </mediaobject>
     </figure>
    </section>
    <section xml:id="location-local-services">
      <title>Location-local service</title>
    <para>A common use for multi-site OpenStack deployment is
        creating a Content Delivery Network. An application that
        uses a location-local architecture requires low network
        latency and proximity to the user to provide an
        optimal user experience and reduce the cost of bandwidth and
        transit. The content resides on sites closer to the customer,
        instead of a centralized content store that requires utilizing
        higher cost cross-country links.</para>
    <para>This architecture includes a geo-location component
        that places user requests to the closest possible node. In
        this scenario, 100% redundancy of content across every site is
        a goal rather than a requirement, with the intent to
        maximize the amount of content available within a
        minimum number of network hops for end users. Despite
        these differences, the storage replication configuration has
        significant overlap with that of a geo-redundant load
        balancing use case.</para>
      <para>In <xref linkend="multi-site_shared_shared_keystone"/>,
        the application utilizing this multi-site OpenStack install
        that is location-aware would launch web server or content
        serving instances on the compute cluster in each site. Requests
        from clients are first sent to a global services load balancer
        that determines the location of the client, then routes the
        request to the closest OpenStack site where the application
        completes the request.</para>
      <figure xml:id="multi-site_shared_shared_keystone">
        <title>Multi-site shared keystone architecture</title>
      <mediaobject>
        <imageobject>
            <imagedata contentwidth="6in"
                fileref="../figures/Multi-Site_shared_keystone1.png"/>
        </imageobject>
      </mediaobject>
     </figure>
    </section>
 </section>
--- a/doc/arch-design/multi_site/section_tech_considerations_multi_site.xml
+++ b/doc/arch-design/multi_site/section_tech_considerations_multi_site.xml
@ -1,176 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="technical-considerations-multi-site">
    <?dbhtml stop-chunking?>
    <title>Technical considerations</title>
    <para>There are many technical considerations to take into account
        with regard to designing a multi-site OpenStack
        implementation. An OpenStack cloud can be designed in a
        variety of ways to handle individual application needs. A
        multi-site deployment has additional challenges compared
        to single site installations and therefore is a more
        complex solution.</para>
    <para>When determining capacity options be sure to take into
        account not just the technical issues, but also the economic
        or operational issues that might arise from specific
        decisions.</para>
    <para>Inter-site link capacity describes the capabilities of the
        connectivity between the different OpenStack sites. This
        includes parameters such as bandwidth, latency, whether or not
        a link is dedicated, and any business policies applied to the
        connection. The capability and number of the links between
        sites determine what kind of options are available for
        deployment. For example, if two sites have a pair of
        high-bandwidth links available between them, it may be wise to
        configure a separate storage replication network between the
        two sites to support a single Swift endpoint and a shared
        Object Storage capability between them. An example of this
        technique, as well as a configuration walk-through, is
        available at <link
        xlink:href="http://docs.openstack.org/developer/swift/replication_network.html#dedicated-replication-network">http://docs.openstack.org/developer/swift/replication_network.html#dedicated-replication-network</link>.
        Another option in this scenario is to build a dedicated set of
        tenant private networks across the secondary link, using
        overlay networks with a third party mapping the site overlays
        to each other.</para>
    <para>The capacity requirements of the links between sites is
        driven by application behavior. If the link latency is
        too high, certain applications that use a large number of
        small packets, for example RPC calls, may encounter issues
        communicating with each other or operating properly.
        Additionally, OpenStack may encounter similar types of issues.
        To mitigate this, Identity service call timeouts can be
        tuned to prevent issues authenticating against a central
        Identity service.</para>
    <para>Another network capacity consideration for a multi-site
        deployment is the amount and performance of overlay networks
        available for tenant networks. If using shared tenant networks
        across zones, it is imperative that an external overlay manager
        or controller be used to map these overlays together. It is
        necessary to ensure the amount of possible IDs between the zones
        are identical.</para>
      <note>
        <para>As of the Kilo release, OpenStack Networking was not
          capable of managing tunnel IDs across installations. So if
          one site runs out of IDs, but another does not, that tenant's
          network is unable to reach the other site.</para>
      </note>
    <para>Capacity can take other forms as well. The ability for a
        region to grow depends on scaling out the number of available
        compute nodes. This topic is covered in greater detail in the
        section for compute-focused deployments. However, it may be
        necessary to grow cells in an individual region, depending on
        the size of your cluster and the ratio of virtual machines per
        hypervisor.</para>
    <para>A third form of capacity comes in the multi-region-capable
        components of OpenStack. Centralized Object Storage is capable
        of serving objects through a single namespace across multiple
        regions. Since this works by accessing the object store through
        swift proxy, it is possible to overload the proxies. There are
        two options available to mitigate this issue:</para>
      <itemizedlist>
        <listitem>
          <para>Deploy a large number of swift proxies. The drawback is
            that the proxies are not load-balanced and a large file
            request could continually hit the same proxy.</para>
        </listitem>
        <listitem>
          <para>Add a caching HTTP proxy and load balancer in front of
            the swift proxies. Since swift objects are returned to the
            requester via HTTP, this load balancer would alleviate the
            load required on the swift proxies.</para>
         </listitem>
       </itemizedlist>
    <section xml:id="utilization-multi-site"><title>Utilization</title>
    <para>While constructing a multi-site OpenStack environment is the
        goal of this guide, the real test is whether an application
        can utilize it.</para>
    <para>The Identity service is normally the first interface for
        OpenStack users and is required for almost all major operations
        within OpenStack. Therefore, it is important that you provide users
        with a single URL for Identity service authentication, and
        document the configuration of regions within the Identity service.
        Each of the sites defined in your installation is considered
        to be a region in Identity nomenclature. This is important for
        the users, as it is required to define the region name when
        providing actions to an API endpoint or in the dashboard.</para>
    <para>Load balancing is another common issue with multi-site
        installations. While it is still possible to run HAproxy
        instances with Load-Balancer-as-a-Service, these are defined
        to a specific region. Some applications can manage this using
        internal mechanisms. Other applications may require the
        implementation of an external system, including global services
        load balancers or anycast-advertised DNS.</para>
    <para>Depending on the storage model chosen during site design,
        storage replication and availability are also a concern
        for end-users. If an application can support regions, then it
        is possible to keep the object storage system separated by region.
        In this case, users who want to have an object available to
        more than one region need to perform cross-site replication.
        However, with a centralized swift proxy, the user may need to
        benchmark the replication timing of the Object Storage back end.
        Benchmarking allows the operational staff to provide users with
        an understanding of the amount of time required for a stored or
        modified object to become available to the entire environment.</para>
      </section>
    <section xml:id="performance"><title>Performance</title>
    <para>Determining the performance of a multi-site installation
        involves considerations that do not come into play in a
        single-site deployment. Being a distributed deployment,
        performance in multi-site deployments may be affected in certain
        situations.</para>
    <para>Since multi-site systems can be geographically separated,
        there may be greater latency or jitter when communicating across
        regions. This can especially impact systems like the OpenStack
        Identity service when making authentication attempts from regions
        that do not contain the centralized Identity implementation. It
        can also affect applications which rely on Remote Procedure Call (RPC)
        for normal operation. An example of this can be seen in high
        performance computing workloads.</para>
    <para>Storage availability can also be impacted by the
        architecture of a multi-site deployment. A centralized Object
        Storage service requires more time for an object to be
        available to instances locally in regions where the object was
        not created. Some applications may need to be tuned to account
        for this effect. Block Storage does not currently have a
        method for replicating data across multiple regions, so
        applications that depend on available block storage need
        to manually cope with this limitation by creating duplicate
        block storage entries in each region.</para>
      </section>
    <section xml:id="openstack-components_multi-site">
      <title>OpenStack components</title>
    <para>Most OpenStack installations require a bare minimum set of
        pieces to function. These include the OpenStack Identity
        (keystone) for authentication, OpenStack Compute
        (nova) for compute, OpenStack Image service (glance) for image
        storage, OpenStack Networking (neutron) for networking, and
        potentially an object store in the form of OpenStack Object
        Storage (swift). Deploying a multi-site installation also demands extra
        components in order to coordinate between regions. A centralized
        Identity service is necessary to provide the single authentication
        point. A centralized dashboard is also recommended to provide a
        single login point and a mapping to the API and CLI
        options available. A centralized Object Storage service may also
        be used, but will require the installation of the swift proxy
        service.</para>
    <para>It may also be helpful to install a few extra options in
        order to facilitate certain use cases. For example,
        installing Designate may assist in automatically generating
        DNS domains for each region with an automatically-populated
        zone full of resource records for each instance. This
        facilitates using DNS as a mechanism for determining which
        region will be selected for certain applications.</para>
    <para>Another useful tool for managing a multi-site installation
        is Orchestration (heat). The Orchestration service allows the
        use of templates to define a set of instances to be launched
        together or for scaling existing sets. It can also be used to
        set up matching or differentiated groupings based on
        regions. For instance, if an application requires an equally
        balanced number of nodes across sites, the same heat template
        can be used to cover each site with small alterations to only
        the region name.</para>
    </section>
 </section>
--- a/doc/arch-design/multi_site/section_user_requirements_multi_site.xml
+++ b/doc/arch-design/multi_site/section_user_requirements_multi_site.xml
@ -1,176 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="user-requirements-multi-site">
    <?dbhtml stop-chunking?>
    <title>User requirements</title>
    <section xml:id="workload-characteristics">
      <title>Workload characteristics</title>
    <para>An understanding of the expected workloads for a desired
        multi-site environment and use case is an important factor in
        the decision-making process. In this context, <literal>workload</literal>
        refers to the way the systems are used. A workload could be a
        single application or a suite of applications that work together.
        It could also be a duplicate set of applications that need to
        run in multiple cloud environments. Often in a multi-site deployment,
        the same workload will need to work identically in more than one
        physical location.</para>
    <para>This multi-site scenario likely includes one or more of the
        other scenarios in this book with the additional requirement
        of having the workloads in two or more locations. The
        following are some possible scenarios:</para>
    <para>For many use cases the proximity of the user to their
        workloads has a direct influence on the performance of the
        application and therefore should be taken into consideration
        in the design. Certain applications require zero to minimal
        latency that can only be achieved by deploying the cloud in
        multiple locations. These locations could be in different data
        centers, cities, countries or geographical regions, depending
        on the user requirement and location of the users.</para></section>
    <section xml:id="consistency-images-templates-across-sites">
        <title>Consistency of images and templates across different
        sites</title>
    <para>It is essential that the deployment of instances is
        consistent across the different sites and built
        into the infrastructure. If the OpenStack Object Storage is used as
        a back end for the Image service, it is possible to create repositories
        of consistent images across multiple sites. Having central
        endpoints with multiple storage nodes allows consistent centralized
        storage for every site.</para>
      <para>Not using a centralized object store increases the operational
        overhead of maintaining a consistent image library. This
        could include development of a replication mechanism to handle
        the transport of images and the changes to the images across
        multiple sites.</para></section>
    <section xml:id="high-availability-multi-site">
      <title>High availability</title>
    <para>If high availability is a requirement to provide continuous
        infrastructure operations, a basic requirement of high
        availability should be defined.</para>
    <para>The OpenStack management components need to have a basic and
        minimal level of redundancy. The simplest example is the loss
        of any single site should have minimal impact on the
        availability of the OpenStack services.</para>
    <para>The <link
        xlink:href="http://docs.openstack.org/ha-guide/"><citetitle>OpenStack
        High Availability Guide</citetitle></link>
        contains more information on how to provide redundancy for the
        OpenStack components.</para>
    <para>Multiple network links should be deployed between sites to
        provide redundancy for all components. This includes storage
        replication, which should be isolated to a dedicated network
        or VLAN with the ability to assign QoS to control the
        replication traffic or provide priority for this traffic. Note
        that if the data store is highly changeable, the network
        requirements could have a significant effect on the
        operational cost of maintaining the sites.</para>
    <para>The ability to maintain object availability in both sites
        has significant implications on the object storage design and
        implementation. It also has a significant impact on the
        WAN network design between the sites.</para>
    <para>Connecting more than two sites increases the challenges and
        adds more complexity to the design considerations. Multi-site
        implementations require planning to address the additional
        topology used for internal and external connectivity. Some options
        include full mesh topology, hub spoke, spine leaf, and 3D Torus.</para>
    <para>If applications running in a cloud are not cloud-aware, there
        should be clear measures and expectations to define what the
        infrastructure can and cannot support. An example would be
        shared storage between sites. It is possible, however such a
        solution is not native to OpenStack and requires a third-party
        hardware vendor to fulfill such a requirement. Another example
        can be seen in applications that are able to consume resources
        in object storage directly. These applications need to be
        cloud aware to make good use of an OpenStack Object
        Store.</para></section>
    <section xml:id="application-readiness">
      <title>Application readiness</title>
    <para>Some applications are tolerant of the lack of synchronized
        object storage, while others may need those objects to be
        replicated and available across regions. Understanding how
        the cloud implementation impacts new and existing applications
        is important for risk mitigation, and the overall success of a
        cloud project. Applications may have to be written or rewritten
        for an infrastructure with little to no redundancy, or with the
        cloud in mind.</para></section>
    <section xml:id="cost-multi-site">
      <title>Cost</title>
    <para>A greater number of sites increase cost and complexity for a
        multi-site deployment. Costs can be broken down into the following
        categories:</para>
    <itemizedlist>
        <listitem>
            <para>Compute resources</para>
        </listitem>
        <listitem>
            <para>Networking resources</para>
        </listitem>
        <listitem>
            <para>Replication</para>
        </listitem>
        <listitem>
            <para>Storage</para>
        </listitem>
        <listitem>
            <para>Management</para>
        </listitem>
        <listitem>
            <para>Operational costs</para>
        </listitem>
    </itemizedlist></section>
    <section xml:id="site-loss-and-recovery">
      <title>Site loss and recovery</title>
    <para>Outages can cause partial or full loss of site functionality.
      Strategies should be implemented to understand and plan for recovery
      scenarios.</para>
    <itemizedlist>
        <listitem>
            <para>The deployed applications need to continue to
                function and, more importantly, you must consider the
                impact on the performance and reliability of the application
                when a site is unavailable.</para>
        </listitem>
        <listitem>
            <para>It is important to understand what happens to the
                replication of objects and data between the sites when
                a site goes down. If this causes queues to start
                building up, consider how long these queues can
                safely exist until an error occurs.</para>
        </listitem>
        <listitem>
          <para>After an outage, ensure the method for resuming proper
            operations of a site is implemented when it comes back online.
            We recommend you architect the recovery to avoid race conditions.</para>
        </listitem>
    </itemizedlist></section>
    <section xml:id="compliance-and-geo-location-multi-site">
      <title>Compliance and geo-location</title>
    <para>An organization may have certain legal obligations and
        regulatory compliance measures which could require certain
        workloads or data to not be located in certain regions.</para></section>
    <section xml:id="auditing-multi-site">
      <title>Auditing</title>
    <para>A well thought-out auditing strategy is important in order
        to be able to quickly track down issues. Keeping track of
        changes made to security groups and tenant changes can be
        useful in rolling back the changes if they affect production.
        For example, if all security group rules for a tenant
        disappeared, the ability to quickly track down the issue would
        be important for operational and legal reasons.</para></section>
    <section xml:id="separation-of-duties">
      <title>Separation of duties</title>
    <para>A common requirement is to define different roles for the
        different cloud administration functions. An example would be
        a requirement to segregate the duties and permissions by
        site.</para></section>
    <section xml:id="authentication-between-sites">
        <title>Authentication between sites</title>
    <para>It is recommended to have a single authentication domain
        rather than a separate implementation for each and every
        site. This requires an authentication mechanism that is highly
        available and distributed to ensure continuous operation.
        Authentication server locality might be required and should be
        planned for.</para></section>
 </section>
--- a/doc/arch-design/network_focus/section_architecture_network_focus.xml
+++ b/doc/arch-design/network_focus/section_architecture_network_focus.xml
@ -1,184 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="architecture-network-focus">
    <title>Architecture</title>
    <para>Network-focused OpenStack architectures have many similarities to
      other OpenStack architecture use cases. There are several factors
      to consider when designing for a network-centric or network-heavy
      application environment.</para>
    <para>Networks exist to serve as a medium of transporting data between
      systems. It is inevitable that an OpenStack design has inter-dependencies
      with non-network portions of OpenStack as well as on external systems.
      Depending on the specific workload, there may be major interactions with
      storage systems both within and external to the OpenStack environment.
      For example, in the case of content delivery network, there is twofold
      interaction with storage. Traffic flows to and from the storage array for
      ingesting and serving content in a north-south direction. In addition,
      there is replication traffic flowing in an east-west direction.</para>
    <para>Compute-heavy workloads may also induce interactions with the
      network. Some high performance compute applications require network-based
      memory mapping and data sharing and, as a result, induce a higher network
      load when they transfer results and data sets. Others may be highly
      transactional and issue transaction locks, perform their functions, and
      revoke transaction locks at high rates. This also has an impact on the
      network performance.</para>
    <para>Some network dependencies are external to OpenStack. While
      OpenStack Networking is capable of providing network ports, IP addresses,
      some level of routing, and overlay networks, there are some other
      functions that it cannot provide. For many of these, you may require
      external systems or equipment to fill in the functional gaps. Hardware
      load balancers are an example of equipment that may be necessary to
      distribute workloads or offload certain functions. OpenStack Networking
      provides a tunneling feature, however it is constrained to a
      Networking-managed region. If the need arises to extend a tunnel beyond
      the OpenStack region to either another region or an external system,
      implement the tunnel itself outside OpenStack or use a tunnel management
      system to map the tunnel or overlay to an external tunnel.
    </para>
    <para>
      Depending on the selected design, Networking itself might not
      support the required <glossterm baseform="Layer-3 network">layer-3
      network</glossterm> functionality. If you choose to use the
      provider networking mode without running the layer-3 agent, you
      must install an external router to provide layer-3 connectivity
      to outside systems.
    </para>
    <para>Interaction with orchestration services is inevitable in
      larger-scale deployments. The Orchestration service is capable of
      allocating network resource defined in templates to map to tenant
      networks and for port creation, as well as allocating floating IPs.
      If there is a requirement to define and manage network resources when
      using orchestration, we recommend that the design include the
      Orchestration service to meet the demands of users.</para>
    <section xml:id="design-impacts">
      <title>Design impacts</title>
      <para>A wide variety of factors can affect a network-focused OpenStack
        architecture. While there are some considerations shared with a general
        use case, specific workloads related to network requirements influence
        network design decisions.</para>
      <para>One decision includes whether or not to use Network Address
        Translation (NAT) and where to implement it. If there is a requirement
        for floating IPs instead of public fixed addresses then you must use
        NAT. An example of this is a DHCP relay that must know the IP of the
        DHCP server. In these cases it is easier to automate the infrastructure
        to apply the target IP to a new instance rather than to reconfigure
        legacy or external systems for each new instance.</para>
      <para>NAT for floating IPs managed by Networking resides within the
        hypervisor but there are also versions of NAT that may be running
        elsewhere. If there is a shortage of IPv4 addresses there are two common
        methods to mitigate this externally to OpenStack. The first is to run a
        load balancer either within OpenStack as an instance, or use an external
        load balancing solution. In the internal scenario, Networking's
        Load-Balancer-as-a-Service (LBaaS) can manage load balancing
        software, for example HAproxy. This is specifically to manage the
        Virtual IP (VIP) while a dual-homed connection from the HAproxy instance
        connects the public network with the tenant private network that hosts
        all of the content servers. In the external scenario, a load balancer
        needs to serve the VIP and also connect to the tenant overlay
        network through external means or through private addresses.</para>
      <para>Another kind of NAT that may be useful is protocol NAT. In some
        cases it may be desirable to use only IPv6 addresses on instances and
        operate either an instance or an external service to provide a NAT-based
        transition technology such as NAT64 and DNS64. This provides the ability
        to have a globally routable IPv6 address while only consuming IPv4
        addresses as necessary or in a shared manner.</para>
      <para>Application workloads affect the design of the underlying network
        architecture. If a workload requires network-level redundancy, the
        routing and switching architecture have to accommodate this. There
        are differing methods for providing this that are dependent on the
        selected network hardware, the performance of the hardware, and which
        networking model you deploy. Examples include
        Link aggregation (LAG) and Hot Standby Router Protocol (HSRP). Also
        consider whether to deploy OpenStack Networking or
        legacy networking (nova-network), and which plug-in to select for
        OpenStack Networking. If using an external system, configure Networking
        to run <glossterm baseform="Layer-2 network">layer 2</glossterm>
        with a provider network configuration. For example, implement HSRP
        to terminate layer-3 connectivity.</para>
      <para>Depending on the workload, overlay networks may not be the best
        solution. Where application network connections are
        small, short lived, or bursty, running a dynamic overlay can generate
        as much bandwidth as the packets it carries. It also can induce enough
        latency to cause issues with certain applications. There is an impact
        to the device generating the overlay which, in most installations,
        is the hypervisor. This causes performance degradation on packet
        per second and connection per second rates.</para>
      <para>Overlays also come with a secondary option that may not be
        appropriate to a specific workload. While all of them operate in full
        mesh by default, there might be good reasons to disable this function
        because it may cause excessive overhead for some workloads. Conversely,
        other workloads operate without issue. For example, most web services
        applications do not have major issues with a full mesh overlay network,
        while some network monitoring tools or storage replication workloads
        have performance issues with throughput or excessive broadcast
        traffic.</para>
      <para>Many people overlook an important design decision: The choice of
        layer-3 protocols. While OpenStack was initially built with only IPv4
        support, Networking now supports IPv6 and dual-stacked networks.
        Some workloads are possible through the use of IPv6 and IPv6 to IPv4
        reverse transition mechanisms such as NAT64 and DNS64 or
        <glossterm>6to4</glossterm>.
        This alters the requirements for any address plan as single-stacked and
        transitional IPv6 deployments can alleviate the need for IPv4
        addresses.</para>
      <para>OpenStack has limited support for
        dynamic routing, however there are a number of options available by
        incorporating third party solutions to implement routing within the
        cloud including network equipment, hardware nodes, and instances. Some
        workloads perform well with nothing more than static routes and default
        gateways configured at the layer-3 termination point. In most cases this
        is sufficient, however some cases require the addition of at least one
        type of dynamic routing protocol if not multiple protocols. Having a
        form of interior gateway protocol (IGP) available to the instances
        inside an OpenStack installation opens up the possibility of use cases
        for anycast route injection for services that need to use it as a
        geographic location or failover mechanism. Other applications may wish
        to directly participate in a routing protocol, either as a passive
        observer, as in the case of a looking glass, or as an active participant
        in the form of a route reflector. Since an instance might have a large
        amount of compute and memory resources, it is trivial to hold an entire
        unpartitioned routing table and use it to provide services such as
        network path visibility to other applications or as a monitoring
        tool.</para>
      <para>Path maximum transmission unit (MTU) failures are lesser known but
        harder to diagnose. The MTU must be large enough to handle normal
        traffic, overhead from an overlay network, and the desired layer-3
        protocol. Adding externally built tunnels reduces the MTU packet size.
        In this case, you must pay attention to the fully
        calculated MTU size because some systems ignore or
        drop path MTU discovery packets.</para>
    </section>
    <section xml:id="tunables">
      <title>Tunable networking components</title>
      <para>Consider configurable networking components related to an
        OpenStack architecture design when designing for network intensive
        workloads that include MTU and QoS. Some workloads require a larger MTU
        than normal due to the transfer of large blocks of data.
        When providing network service for applications such as video
        streaming or storage replication, we recommend that you configure
        both OpenStack hardware nodes and the supporting network equipment
        for jumbo frames where possible. This allows for better use of
        available bandwidth. Configure jumbo frames
        across the complete path the packets traverse. If one network
        component is not capable of handling jumbo frames then the entire
        path reverts to the default MTU.</para>
      <para>Quality of Service (QoS) also has a great impact on network
        intensive workloads as it provides instant service to packets which
        have a higher priority due to the impact of poor
        network performance. In applications such as Voice over IP (VoIP),
        differentiated services code points are a near requirement for proper
        operation. You can also use QoS in the opposite direction for mixed
        workloads to prevent low priority but high bandwidth applications,
        for example backup services, video conferencing, or file sharing,
        from blocking bandwidth that is needed for the proper operation of
        other workloads. It is possible to tag file storage traffic as a
        lower class, such as best effort or scavenger, to allow the higher
        priority traffic through. In cases where regions within a cloud might
        be geographically distributed it may also be necessary to plan
        accordingly to implement WAN optimization to combat latency or
        packet loss.</para>
    </section>
 </section>
--- a/doc/arch-design/network_focus/section_operational_considerations_network_focus.xml
+++ b/doc/arch-design/network_focus/section_operational_considerations_network_focus.xml
@ -1,68 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="operational-considerations-networking-focus">
    <?dbhtml stop-chunking?>
    <title>Operational considerations</title>
    <para>Network-focused OpenStack clouds have a number of operational
      considerations that influence the selected design, including:</para>
    <itemizedlist>
      <listitem>
        <para>Dynamic routing of static routes</para>
      </listitem>
      <listitem>
        <para>Service level agreements (SLAs)</para>
      </listitem>
      <listitem>
        <para>Ownership of user management</para>
      </listitem>
    </itemizedlist>
    <para>An initial network consideration is the selection of a telecom
      company or transit provider.</para>
    <para>Make additional design decisions about monitoring and alarming.
      This can be an internal responsibility or the responsibility of the
      external provider. In the case of using an external provider, service
      level agreements (SLAs) likely apply. In addition, other operational
      considerations such as bandwidth, latency, and jitter can be part of an
      SLA.</para>
    <para>Consider the ability to upgrade the infrastructure. As demand for
      network resources increase, operators add additional IP address blocks
      and add additional bandwidth capacity. In addition, consider managing
      hardware and software life cycle events, for example upgrades,
      decommissioning, and outages, while avoiding service interruptions for
      tenants.</para>
    <para>Factor maintainability into the overall network design. This
      includes the ability to manage and maintain IP addresses as well as the
      use of overlay identifiers including VLAN tag IDs, GRE tunnel IDs, and
      MPLS tags. As an example, if you may need to change all of the IP
      addresses on a network, a process known as renumbering, then the design
      must support this function.</para>
    <para>Address network-focused applications when considering certain
      operational realities. For example, consider the impending exhaustion
      of IPv4 addresses, the migration to IPv6, and the use of private
      networks to segregate different types of traffic that an application
      receives or generates. In the case of IPv4 to IPv6 migrations,
      applications should follow best practices for storing IP addresses.
      We recommend you avoid relying on IPv4 features that did not carry over
      to the IPv6 protocol or have differences in implementation.</para>
    <para>To segregate traffic, allow applications to create a private tenant
      network for database and storage network traffic. Use a public network
      for services that require direct client access from the internet. Upon
      segregating the traffic, consider quality of service (QoS) and security
      to ensure each network has the required level of service.</para>
    <para>Finally, consider the routing of network traffic.
      For some applications, develop a complex policy framework for
      routing. To create a routing policy that satisfies business requirements,
      consider the economic cost of transmitting traffic over expensive links
      versus cheaper links, in addition to bandwidth, latency, and jitter
      requirements.</para>
    <para>Additionally, consider how to respond to network events. As an
      example, how load transfers from one link to another during a
      failure scenario could be a factor in the design. If you do not plan
      network capacity correctly, failover traffic could overwhelm other ports
      or network links and create a cascading failure scenario. In this case,
      traffic that fails over to one link overwhelms that link and then moves
      to the subsequent links until all network traffic stops.</para>
 </section>
--- a/doc/arch-design/network_focus/section_prescriptive_examples_network_focus.xml
+++ b/doc/arch-design/network_focus/section_prescriptive_examples_network_focus.xml
@ -1,209 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="prescriptive-example-large-scale-web-app">
    <?dbhtml stop-chunking?>
    <title>Prescriptive examples</title>
    <para>An organization designs a large-scale web application with cloud
        principles in mind. The application scales
        horizontally in a bursting fashion and generates a high
        instance count. The application requires an SSL connection to
        secure data and must not lose connection state to individual
        servers.</para>
    <para>The figure below depicts an example design for this workload.
        In this example, a hardware load balancer provides SSL offload
        functionality and connects
        to tenant networks in order to reduce address consumption.
        This load balancer links to the routing architecture as it
        services the VIP for the application. The router and load
        balancer use the GRE tunnel ID of the
        application's tenant network and an IP address within
        the tenant subnet but outside of the address pool. This is to
        ensure that the load balancer can communicate with the
        application's HTTP servers without requiring the consumption
        of a public IP address.</para>
    <para>Because sessions persist until closed, the routing and
        switching architecture provides high availability.
        Switches mesh to each hypervisor and each other, and
        also provide an MLAG implementation to ensure that layer-2
        connectivity does not fail. Routers use VRRP
        and fully mesh with switches to ensure layer-3 connectivity.
        Since GRE is provides an overlay network, Networking is present
        and uses the Open vSwitch agent in GRE tunnel
        mode. This ensures all devices can reach all other devices and
        that you can create tenant networks for private addressing
        links to the load balancer.
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in"
                fileref="../figures/Network_Web_Services1.png"
            />
        </imageobject>
    </mediaobject></para>
    <para>A web service architecture has many options and optional
        components. Due to this, it can fit into a large number of
        other OpenStack designs. A few key components, however, need
        to be in place to handle the nature of most web-scale
        workloads. You require the following components:</para>
    <itemizedlist>
        <listitem>
            <para>OpenStack Controller services (Image, Identity,
                Networking and supporting services such as MariaDB and
                RabbitMQ)</para>
        </listitem>
        <listitem>
            <para>OpenStack Compute running KVM hypervisor</para>
        </listitem>
        <listitem>
            <para>OpenStack Object Storage</para>
        </listitem>
        <listitem>
            <para>Orchestration service</para>
        </listitem>
        <listitem>
            <para>Telemetry service</para>
        </listitem>
    </itemizedlist>
    <para>Beyond the normal Identity, Compute, Image service, and Object
        Storage components, we recommend the Orchestration service
        component to handle the proper scaling of workloads to adjust to
        demand. Due to the requirement for auto-scaling,
        the design includes the Telemetry service. Web services
        tend to be bursty in load, have very defined peak and valley
        usage patterns and, as a result, benefit from automatic scaling
        of instances based upon traffic. At a network level, a split
        network configuration works well with databases residing on
        private tenant networks since these do not emit a large quantity
        of broadcast traffic and may need to interconnect to some
        databases for content.
    </para>
    <section xml:id="load-balancing">
      <title>Load balancing</title>
      <para>Load balancing spreads requests across multiple instances.
        This workload scales well horizontally across large numbers of
        instances. This enables instances to run without publicly
        routed IP addresses and instead to rely on the load
        balancer to provide a globally reachable service.
        Many of these services do not require
        direct server return. This aids in address planning and
        utilization at scale since only the virtual IP (VIP) must be
        public.</para>
    </section>
    <section xml:id="overlay-networks">
      <title>Overlay networks</title>
      <para>
        The overlay functionality design includes OpenStack Networking
        in Open vSwitch GRE tunnel mode.
        In this case, the layer-3 external routers pair with
        VRRP, and switches pair with an implementation of
        MLAG to ensure that you do not lose connectivity with
        the upstream routing infrastructure.
      </para>
    </section>
    <section xml:id="performance-tuning">
      <title>Performance tuning</title>
      <para>Network level tuning for this workload is minimal.
        Quality-of-Service (QoS) applies to these workloads
        for a middle ground Class Selector depending on existing
        policies. It is higher than a best effort queue but lower
        than an Expedited Forwarding or Assured Forwarding queue.
        Since this type of application generates larger packets with
        longer-lived connections, you can optimize bandwidth utilization
        for long duration TCP. Normal bandwidth planning
        applies here with regards to benchmarking a session's usage
        multiplied by the expected number of concurrent sessions with
        overhead.</para>
    </section>
    <section xml:id="network-functions">
      <title>Network functions</title>
      <para>Network functions is a broad category but encompasses
        workloads that support the rest of a system's network. These
        workloads tend to consist of large amounts of small packets
        that are very short lived, such as DNS queries or SNMP traps.
        These messages need to arrive quickly and do not deal with
        packet loss as there can be a very large volume of them. There
        are a few extra considerations to take into account for this
        type of workload and this can change a configuration all the
        way to the hypervisor level. For an application that generates
        10 TCP sessions per user with an average bandwidth of 512
        kilobytes per second per flow and expected user count of ten
        thousand concurrent users, the expected bandwidth plan is
        approximately 4.88 gigabits per second.</para>
    <para>The supporting network for this type of configuration needs
        to have a low latency and evenly distributed availability.
        This workload benefits from having services local to the
        consumers of the service. Use a multi-site approach as
        well as deploying many copies of the application to handle
        load as close as possible to consumers. Since these
        applications function independently, they do not warrant
        running overlays to interconnect tenant networks. Overlays
        also have the drawback of performing poorly with rapid flow
        setup and may incur too much overhead with large quantities of
        small packets and therefore we do not recommend them.</para>
    <para>QoS is desirable for some workloads to ensure delivery. DNS
        has a major impact on the load times of other services and
        needs to be reliable and provide rapid responses. Configure rules
        in upstream devices to apply a higher Class
        Selector to DNS to ensure faster delivery or a better spot in
        queuing algorithms.</para>
    </section>
    <section xml:id="cloud-storage">
      <title>Cloud storage</title>
      <para>Another common use case for OpenStack environments is providing
        a cloud-based file storage and sharing service. You might
        consider this a storage-focused use case, but its network-side
        requirements make it a network-focused use case.</para>
      <para>For example, consider a cloud backup application. This workload
        has two specific behaviors that impact the network. Because this
        workload is an externally-facing service and an
        internally-replicating application, it has both <glossterm
        baseform="north-south traffic">north-south</glossterm> and
        <glossterm>east-west traffic</glossterm>
        considerations:</para>
    <variablelist>
      <varlistentry>
        <term>north-south traffic</term>
        <listitem>
          <para>When a user uploads and stores content, that content moves
            into the OpenStack installation. When users download this
            content, the content moves out from the OpenStack
            installation. Because this service operates primarily
            as a backup, most of the traffic moves southbound into the
            environment.  In this situation, it benefits you to
            configure a network to be asymmetrically downstream
            because the traffic that enters the OpenStack installation
            is greater than the traffic that leaves the installation.</para>
        </listitem>
      </varlistentry>
      <varlistentry>
        <term>east-west traffic</term>
        <listitem>
          <para>Likely to be fully symmetric. Because replication
            originates from any node and might target multiple other
            nodes algorithmically, it is less likely for this traffic
            to have a larger volume in any specific direction. However
            this traffic might interfere with north-south traffic.</para>
        </listitem>
      </varlistentry>
    </variablelist>
    <mediaobject>
        <imageobject>
            <imagedata contentwidth="4in"
                fileref="../figures/Network_Cloud_Storage2.png"
            />
        </imageobject>
    </mediaobject>
    <para>This application prioritizes the north-south traffic over
      east-west traffic: the north-south traffic involves
      customer-facing data.</para>
    <para>The network design in this case is less dependent on
      availability and more dependent on being able to handle high
      bandwidth. As a direct result, it is beneficial to forgo
      redundant links in favor of bonding those connections. This
      increases available bandwidth. It is also beneficial to
      configure all devices in the path, including OpenStack, to
      generate and pass jumbo frames.</para>
  </section>
 </section>
--- a/doc/arch-design/network_focus/section_tech_considerations_network_focus.xml
+++ b/doc/arch-design/network_focus/section_tech_considerations_network_focus.xml
@ -1,462 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="technical-considerations-network-focus">
    <?dbhtml stop-chunking?>
    <title>Technical considerations</title>
    <para>When you design an OpenStack network architecture, you must
        consider layer-2 and layer-3 issues. Layer-2
        decisions involve those made at the data-link layer, such as
        the decision to use Ethernet versus Token Ring. Layer-3 decisions
        involve those made about the protocol layer and the point when
        IP comes into the picture. As an example, a completely
        internal OpenStack network can exist at layer 2 and ignore
        layer 3. In order for any traffic to go outside of
        that cloud, to another network, or to the Internet, however, you must
        use a layer-3 router or switch.</para>
    <para>The past few years have seen two competing trends in
      networking. One trend leans towards building data center network
      architectures based on layer-2 networking. Another trend treats
      the cloud environment essentially as a miniature version of the
      Internet. This approach is radically different from the network
      architecture approach in the staging environment:
      the Internet only uses layer-3 routing rather than
      layer-2 switching.</para>
    <para>A network designed on layer-2 protocols has advantages over one
      designed on layer-3 protocols. In spite of the difficulties of
      using a bridge to perform the network role of a router, many
      vendors, customers, and service providers choose to use Ethernet
      in as many parts of their networks as possible. The benefits of
      selecting a layer-2 design are:</para>
    <itemizedlist>
        <listitem>
            <para>Ethernet frames contain all the essentials for
                networking. These include, but are not limited to,
                globally unique source addresses, globally unique
                destination addresses, and error control.</para>
        </listitem>
        <listitem>
            <para>Ethernet frames can carry any kind of packet.
                Networking at layer 2 is independent of the layer-3
                protocol.</para>
        </listitem>
        <listitem>
            <para>Adding more layers to the Ethernet frame only slows
                the networking process down. This is known as 'nodal
                processing delay'.</para>
        </listitem>
        <listitem>
            <para>You can add adjunct networking features, for
                example class of service (CoS) or multicasting, to
                Ethernet as readily as IP networks.</para>
        </listitem>
        <listitem>
            <para>VLANs are an easy mechanism for isolating
                networks.</para>
        </listitem>
    </itemizedlist>
    <para>Most information starts and ends inside Ethernet frames.
        Today this applies to data, voice (for example, VoIP), and
        video (for example, web cameras). The concept is that, if you can
        perform more of the end-to-end transfer of information from
        a source to a destination in the form of Ethernet frames, the network
        benefits more from the advantages of Ethernet.
        Although it is not a substitute for IP networking, networking at
        layer 2 can be a powerful adjunct to IP networking.</para>
    <para>
      Layer-2 Ethernet usage has these advantages over layer-3 IP
      network usage:
    </para>
    <itemizedlist>
      <listitem>
        <para>Speed</para>
      </listitem>
      <listitem>
          <para>Reduced overhead of the IP hierarchy.</para>
      </listitem>
      <listitem>
        <para>No need to keep track of address configuration as systems
          move around. Whereas the simplicity of layer-2
          protocols might work well in a data center with hundreds
          of physical machines, cloud data centers have the
          additional burden of needing to keep track of all virtual
          machine addresses and networks. In these data centers, it
          is not uncommon for one physical node to support 30-40
          instances.</para>
      </listitem>
    </itemizedlist>
    <important>
      <para>Networking at the frame level says nothing
        about the presence or absence of IP addresses at the packet
        level. Almost all ports, links, and devices on a network of
        LAN switches still have IP addresses, as do all the source and
        destination hosts. There are many reasons for the continued
        need for IP addressing. The largest one is the need to manage
        the network. A device or link without an IP address is usually
        invisible to most management applications. Utilities including
        remote access for diagnostics, file transfer of configurations
        and software, and similar applications cannot run without IP
        addresses as well as MAC addresses.</para>
    </important>
    <section xml:id="layer-2-arch-limitations">
      <title>Layer-2 architecture limitations</title>
    <para>Outside of the traditional data center the limitations of
        layer-2 network architectures become more obvious.</para>
    <itemizedlist>
        <listitem>
            <para>Number of VLANs is limited to 4096.</para>
        </listitem>
        <listitem>
            <para>The number of MACs stored in switch tables is
                limited.</para>
        </listitem>
        <listitem>
            <para>You must accommodate the need to maintain a set of
                layer-4 devices to handle traffic control.</para>
        </listitem>
        <listitem>
            <para>MLAG, often used for switch redundancy, is a
                proprietary solution that does not scale beyond two
                devices and forces vendor lock-in.</para>
        </listitem>
        <listitem>
            <para>It can be difficult to troubleshoot a network
                without IP addresses and ICMP.</para>
        </listitem>
        <listitem>
            <para>Configuring <glossterm
              baseform="Address Resolution Protocol (ARP)">ARP</glossterm>
              can be complicated on large layer-2 networks.</para>
        </listitem>
        <listitem>
            <para>All network devices need to be aware of all MACs,
                even instance MACs, so there is constant churn in MAC
                tables and network state changes as instances start and
                stop.</para>
        </listitem>
        <listitem>
            <para>Migrating MACs (instance migration) to different
                physical locations are a potential problem if you do not
                set ARP table timeouts properly.</para>
        </listitem>
    </itemizedlist>
    <para>It is important to know that layer 2 has a very limited set
        of network management tools. It is very difficult to control
        traffic, as it does not have mechanisms to manage the network
        or shape the traffic, and network troubleshooting is very
        difficult. One reason for this difficulty is network devices
        have no IP addresses. As a result, there is no reasonable way
        to check network delay in a layer-2 network.</para>
    <para>On large layer-2 networks, configuring ARP learning can also
        be complicated. The setting for the MAC address timer on
        switches is critical and, if set incorrectly, can cause
        significant performance problems. As an example, the Cisco
        default MAC address timer is extremely long. Migrating MACs to
        different physical locations to support instance migration can
        be a significant problem. In this case, the network
        information maintained in the switches could be out of sync
        with the new location of the instance.</para>
    <para>In a layer-2 network, all devices are aware of all MACs,
        even those that belong to instances. The network state
        information in the backbone changes whenever an instance starts
        or stops. As a result there is far too much churn in
        the MAC tables on the backbone switches.</para>
    </section>
    <section xml:id="layer-3-arch-advantages">
      <title>Layer-3 architecture advantages</title>
    <para>In the layer 3 case, there is no churn in the routing tables
        due to instances starting and stopping. The only time there
        would be a routing state change is in the case of a Top
        of Rack (ToR) switch failure or a link failure in the backbone
        itself. Other advantages of using a layer-3 architecture
        include:</para>
    <itemizedlist>
        <listitem>
            <para>Layer-3 networks provide the same level of
                resiliency and scalability as the Internet.</para>
        </listitem>
        <listitem>
            <para>Controlling traffic with routing metrics is
                straightforward.</para>
        </listitem>
        <listitem>
            <para>You can configure layer 3 to use <glossterm
                baseform="Border Gateway Protocol (BGP)">BGP</glossterm>
                confederation for scalability so core routers have state
                proportional to the number of racks, not to the number of
                servers or instances.</para>
        </listitem>
        <listitem>
            <para>Routing takes instance MAC and IP addresses
                out of the network core, reducing state churn. Routing
                state changes only occur in the case of a ToR switch
                failure or backbone link failure.</para>
        </listitem>
        <listitem>
            <para>There are a variety of well tested tools, for
                example ICMP, to monitor and manage traffic.</para>
        </listitem>
        <listitem>
            <para>Layer-3 architectures enable the use of Quality
                of Service (QoS) to manage network performance.</para>
        </listitem>
    </itemizedlist>
    <section xml:id="layer-3-arch-limitations">
      <title>Layer-3 architecture limitations</title>
    <para>The main limitation of layer 3 is that there is no built-in
        isolation mechanism comparable to the VLANs in layer-2
        networks. Furthermore, the hierarchical nature of IP addresses
        means that an instance is on the same subnet as its
        physical host. This means that you cannot migrate it outside
        of the subnet easily. For these reasons, network
        virtualization needs to use IP <glossterm>encapsulation</glossterm>
        and software at the end hosts for isolation and the separation of
        the addressing in the virtual layer from the addressing in the
        physical layer. Other potential disadvantages of layer 3
        include the need to design an IP addressing scheme rather than
        relying on the switches to keep track of the MAC
        addresses automatically and to configure the interior gateway routing
        protocol in the switches.</para>
    </section>
    </section>
    <section xml:id="network-recommendations-overview">
        <title>Network recommendations overview</title>
    <para>OpenStack has complex networking requirements for several
        reasons. Many components interact at different levels of the
        system stack that adds complexity. Data flows are complex.
        Data in an OpenStack cloud moves both between instances across
        the network (also known as East-West), as well as in and out
        of the system (also known as North-South). Physical server
        nodes have network requirements that are independent of instance
        network requirements, which you must isolate from the core
        network to account for scalability. We recommend
        functionally separating the networks for security purposes and
        tuning performance through traffic shaping.</para>
    <para>You must consider a number of important general technical
        and business factors when planning and
        designing an OpenStack network. They include:</para>
    <itemizedlist>
        <listitem>
            <para>A requirement for vendor independence. To avoid
                hardware or software vendor lock-in, the design should
                not rely on specific features of a vendor's router or
                switch.</para>
        </listitem>
        <listitem>
            <para>A requirement to massively scale the ecosystem to
                support millions of end users.</para>
        </listitem>
        <listitem>
            <para>A requirement to support indeterminate platforms and
                applications.</para>
        </listitem>
        <listitem>
            <para>A requirement to design for cost efficient
                operations to take advantage of massive scale.</para>
        </listitem>
        <listitem>
            <para>A requirement to ensure that there is no single
                point of failure in the cloud ecosystem.</para>
        </listitem>
        <listitem>
            <para>A requirement for high availability architecture to
                meet customer SLA requirements.</para>
        </listitem>
        <listitem>
            <para>A requirement to be tolerant of rack level
                failure.</para>
        </listitem>
        <listitem>
            <para>A requirement to maximize flexibility to architect
                future production environments.</para>
        </listitem>
    </itemizedlist>
    <para>Bearing in mind these considerations, we recommend the following:</para>
    <itemizedlist>
        <listitem>
            <para>Layer-3 designs are preferable to layer-2
                architectures.</para>
        </listitem>
        <listitem>
            <para>Design a dense multi-path network core to support
                multi-directional scaling and flexibility.</para>
        </listitem>
        <listitem>
            <para>Use hierarchical addressing because it is the only
                viable option to scale network ecosystem.</para>
        </listitem>
        <listitem>
            <para>Use virtual networking to isolate instance service
                network traffic from the management and internal
                network traffic.</para>
        </listitem>
        <listitem>
            <para>Isolate virtual networks using encapsulation
                technologies.</para>
        </listitem>
        <listitem>
            <para>Use traffic shaping for performance tuning.</para>
        </listitem>
        <listitem>
            <para>Use eBGP to connect to the Internet up-link.</para>
        </listitem>
        <listitem>
            <para>Use iBGP to flatten the internal traffic on the
                layer-3 mesh.</para>
        </listitem>
        <listitem>
            <para>Determine the most effective configuration for block
                storage network.</para>
        </listitem>
    </itemizedlist></section>
    <section xml:id="additional-considerations-network-focus">
      <title>Additional considerations</title>
    <para>There are several further considerations when designing a
        network-focused OpenStack cloud.</para>
    <section xml:id="openstack-networking-versus-nova-network">
      <title>OpenStack Networking versus legacy networking (nova-network)
        considerations</title>
      <para>Selecting the type of networking technology to implement
        depends on many factors. OpenStack Networking (neutron) and
        legacy networking (nova-network) both have their advantages and
        disadvantages. They are both valid and supported options that fit
        different use cases:</para>
        <informaltable rules="all">
                <col width="40%" />
                <col width="60%" />
                <thead>
                    <tr><th>Legacy networking (nova-network)</th>
                        <th>OpenStack Networking</th></tr>
                </thead>
            <tbody>
                <tr>
                    <td>Simple, single agent</td>
                    <td>Complex, multiple agents</td>
                </tr>
                <tr>
                    <td>More mature, established</td>
                    <td>Newer, maturing</td>
                </tr>
                <tr>
                    <td>Flat or VLAN</td>
                    <td>Flat, VLAN, Overlays, L2-L3, SDN</td></tr>
                <tr>
                    <td>No plug-in support</td>
                    <td>Plug-in support for 3rd parties</td>
                </tr>
                <tr>
                    <td>Scales well</td>
                    <td>Scaling requires 3rd party plug-ins</td>
                </tr>
                <tr>
                    <td>No multi-tier topologies</td>
                    <td>Multi-tier topologies</td>
                </tr>
            </tbody>
        </informaltable>
    </section>
    <section xml:id="redundant-networking-tor-switch-ha">
      <title>Redundant networking: ToR switch high availability
        risk analysis</title>
    <para>A technical consideration of networking is the idea that
        you should install switching gear in a data center
        with backup switches in case of hardware failure.</para>
    <para>Research indicates the mean time between failures (MTBF) on switches
      is between 100,000 and 200,000 hours. This number is dependent
      on the ambient temperature of the switch in the data
      center. When properly cooled and maintained, this translates to
      between 11 and 22 years before failure. Even in the worst case
      of poor ventilation and high ambient temperatures in the data
      center, the MTBF is still 2-3 years. See <link
      xlink:href="http://www.garrettcom.com/techsupport/papers/ethernet_switch_reliability.pdf">http://www.garrettcom.com/techsupport/papers/ethernet_switch_reliability.pdf</link>
      for further information.</para>
    <para>In most cases, it is much more economical to use a
        single switch with a small pool of spare switches to replace
        failed units than it is to outfit an entire data center with
        redundant switches. Applications should tolerate rack level
        outages without affecting normal
        operations, since network and compute resources are easily
        provisioned and plentiful.</para>
    </section>
    <section xml:id="preparing-for-future-ipv6-support">
      <title>Preparing for the future: IPv6 support</title>
      <para>One of the most important networking topics today is the
        impending exhaustion of IPv4 addresses. In early 2014, ICANN
        announced that they started allocating the final IPv4 address
        blocks to the Regional Internet Registries (<link
        xlink:href="http://www.internetsociety.org/deploy360/blog/2014/05/goodbye-ipv4-iana-starts-allocating-final-address-blocks/">http://www.internetsociety.org/deploy360/blog/2014/05/goodbye-ipv4-iana-starts-allocating-final-address-blocks/</link>).
        This means the IPv4 address space is close to being fully
        allocated. As a result, it will soon become difficult to
        allocate more IPv4 addresses to an application that has
        experienced growth, or that you expect to scale out, due to the lack
        of unallocated IPv4 address blocks.</para>
      <para>For network focused applications the future is the IPv6
        protocol. IPv6 increases the address space significantly,
        fixes long standing issues in the IPv4 protocol, and will
        become essential for network focused applications in the
        future.</para>
      <para>OpenStack Networking supports IPv6 when configured to take
        advantage of it. To enable IPv6, create an IPv6 subnet in
        Networking and use IPv6 prefixes when creating security
        groups.</para></section>
    <section xml:id="asymmetric-links">
      <title>Asymmetric links</title>
      <para>When designing a network architecture, the traffic patterns
        of an application heavily influence the allocation of
        total bandwidth and the number of links that you use to send
        and receive traffic. Applications that provide file storage
        for customers allocate bandwidth and links to favor
        incoming traffic, whereas video streaming applications
        allocate bandwidth and links to favor outgoing traffic.</para>
    </section>
    <section xml:id="performance-network-focus">
      <title>Performance</title>
      <para>It is important to analyze the applications' tolerance for
        latency and jitter when designing an environment to support
        network focused applications. Certain applications, for
        example VoIP, are less tolerant of latency and jitter. Where
        latency and jitter are concerned, certain applications may
        require tuning of QoS parameters and network device queues to
        ensure that they queue for transmit immediately or
        guarantee minimum bandwidth. Since OpenStack currently does
        not support these functions, consider carefully your selected
        network plug-in.</para>
      <para>The location of a service may also impact the application or
        consumer experience. If an application serves
        differing content to different users it must properly direct
        connections to those specific locations. Where appropriate,
        use a multi-site installation for these situations.</para>
      <para>You can implement networking in two separate
        ways. Legacy networking (nova-network) provides a flat DHCP network
        with a single broadcast domain. This implementation does not
        support tenant isolation networks or advanced plug-ins, but it
        is currently the only way to implement a distributed layer-3
        agent using the multi_host configuration.
        OpenStack Networking (neutron) is the official networking implementation
        and provides a pluggable architecture that supports a large
        variety of network methods. Some of these include a layer-2
        only provider network model, external device plug-ins, or even
        OpenFlow controllers.</para>
      <para>Networking at large scales becomes a set of boundary
        questions. The determination of how large a layer-2 domain
        must be is based on the amount of nodes within the domain
        and the amount of broadcast traffic that passes between
        instances. Breaking layer-2 boundaries may require the
        implementation of overlay networks and tunnels. This decision
        is a balancing act between the need for a smaller overhead or
        a need for a smaller domain.</para>
      <para>When selecting network devices, be aware that making this
        decision based on the greatest port density often comes with a
        drawback. Aggregation switches and routers have not all kept
        pace with Top of Rack switches and may induce bottlenecks on
        north-south traffic. As a result, it may be possible for
        massive amounts of downstream network utilization to impact
        upstream network devices, impacting service to the cloud.
        Since OpenStack does not currently provide a mechanism for
        traffic shaping or rate limiting, it is necessary to implement
        these features at the network hardware level.</para>
      </section>
    </section>
 </section>
--- a/doc/arch-design/network_focus/section_user_requirements_network_focus.xml
+++ b/doc/arch-design/network_focus/section_user_requirements_network_focus.xml
@ -1,104 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <section xmlns="http://docbook.org/ns/docbook"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  version="5.0"
  xml:id="user-requirements-network-focus">
    <?dbhtml stop-chunking?>
    <title>User requirements</title>
    <para>Network-focused architectures vary from the general-purpose
      architecture designs. Certain network-intensive applications influence
      these architectures. Some of the business requirements that influence
      the design include:</para>
    <itemizedlist>
        <listitem>
            <para>Network latency through slow page loads, degraded video
              streams, and low quality VoIP sessions impacts the user
              experience. Users are often not aware of how network design and
              architecture affects their experiences. Both enterprise customers
              and end-users rely on the network for delivery of an application.
              Network performance problems can result in a negative experience
              for the end-user, as well as productivity and economic loss.
           </para>
        </listitem>
    </itemizedlist>
    <section xml:id="high-availability-issues-network-focus">
      <title>High availability issues</title>
      <para>Depending on the application and use case, network-intensive
        OpenStack installations can have high availability requirements.
        Financial transaction systems have a much higher requirement for high
        availability than a development application. Use network availability
        technologies, for example quality of service (QoS), to improve the
        network performance of sensitive applications such as VoIP and video
        streaming.</para>
      <para>High performance systems have SLA requirements for a minimum
        QoS with regard to guaranteed uptime, latency, and bandwidth. The level
        of the SLA can have a significant impact on the network architecture and
        requirements for redundancy in the systems.</para>
    </section>
    <section xml:id="risks-network-focus">
      <title>Risks</title>
      <variablelist>
        <varlistentry>
          <term>Network misconfigurations</term>
          <listitem>
            <para>Configuring incorrect IP addresses, VLANs, and routers
              can cause outages to areas of the network or, in the worst-case
              scenario, the entire cloud infrastructure. Automate network
              configurations to minimize the opportunity for operator error
              as it can cause disruptive problems.</para>
          </listitem>
        </varlistentry>
        <varlistentry>
          <term>Capacity planning</term>
          <listitem>
            <para>Cloud networks require management for capacity and growth
              over time. Capacity planning includes the purchase of network
              circuits and hardware that can potentially have lead times
              measured in months or years.</para>
          </listitem>
        </varlistentry>
        <varlistentry>
          <term>Network tuning</term>
          <listitem>
            <para>Configure cloud networks to minimize link loss, packet loss,
              packet storms, broadcast storms, and loops.</para>
          </listitem>
        </varlistentry>
        <varlistentry>
          <term>Single Point Of Failure (SPOF)</term>
          <listitem>
            <para>Consider high availability at the physical and environmental
              layers. If there is a single point of failure due to only one
              upstream link, or only one power supply, an outage can become
              unavoidable.</para>
          </listitem>
        </varlistentry>
        <varlistentry>
          <term>Complexity</term>
          <listitem>
            <para>An overly complex network design can be difficult to
              maintain and troubleshoot. While device-level configuration
              can ease maintenance concerns and automated tools can handle
              overlay networks, avoid or document non-traditional interconnects
              between functions and specialized hardware to prevent
              outages.</para>
          </listitem>
        </varlistentry>
        <varlistentry>
          <term>Non-standard features</term>
          <listitem>
            <para>There are additional risks that arise from configuring the
              cloud network to take advantage of vendor specific features.
              One example is multi-link aggregation (MLAG) used to provide
              redundancy at the aggregator switch level of the network. MLAG
              is not a standard and, as a result, each vendor has their own
              proprietary implementation of the feature. MLAG architectures
              are not interoperable across switch vendors, which leads to
              vendor lock-in, and can cause delays or inability when upgrading
              components.</para>
          </listitem>
        </varlistentry>
      </variablelist>
    </section>
 </section>
--- a/doc/arch-design/pom.xml
+++ b/doc/arch-design/pom.xml
@ -1,83 +0,0 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <parent>
        <groupId>org.openstack.docs</groupId>
        <artifactId>parent-pom</artifactId>
        <version>1.0.0-SNAPSHOT</version>
        <relativePath>../pom.xml</relativePath>
    </parent>
    <modelVersion>4.0.0</modelVersion>
    <artifactId>openstack-arch-design</artifactId>
    <packaging>jar</packaging>
    <name>OpenStack Architecture Design Guide</name>
    <properties>
        <!-- This is set by Jenkins according to the branch. -->
        <release.path.name></release.path.name>
        <comments.enabled>0</comments.enabled>
    </properties>
    <!-- ################################################ -->
    <!-- USE "mvn clean generate-sources" to run this POM -->
    <!-- ################################################ -->
    <build>
        <plugins>
            <plugin>
                <groupId>com.rackspace.cloud.api</groupId>
                <artifactId>clouddocs-maven-plugin</artifactId>
                <!-- version set in ../pom.xml -->
                <executions>
                    <execution>
                        <id>generate-webhelp</id>
                        <goals>
                            <goal>generate-webhelp</goal>
                        </goals>
                        <phase>generate-sources</phase>
                        <configuration>
                            <!-- These parameters only apply to webhelp -->
                            <enableDisqus>0</enableDisqus>
                            <disqusShortname>openstack-arch-design</disqusShortname>
                            <enableGoogleAnalytics>1</enableGoogleAnalytics>
                            <googleAnalyticsId>UA-17511903-1</googleAnalyticsId>
                            <generateToc>
                                appendix  toc,title
                                article/appendix  nop
                                article   toc,title
                                book      toc,title,figure,table,example,equation
                                chapter   toc,title
                                section   toc
                                part      toc,title
                                qandadiv  toc
                                qandaset  toc
                                reference toc,title
                                set       toc,title
                            </generateToc>
                            <!-- The following elements sets the autonumbering of sections in output for chapter numbers but no numbered sections-->
                            <sectionAutolabel>0</sectionAutolabel>
                            <tocSectionDepth>1</tocSectionDepth>
                            <sectionLabelIncludesComponentLabel>0</sectionLabelIncludesComponentLabel>
                            <webhelpDirname>arch-design</webhelpDirname>
                            <pdfFilenameBase>arch-design</pdfFilenameBase>
                            <pageWidth>7.44in</pageWidth>
                            <pageHeight>9.68in</pageHeight>
                            <doubleSided>1</doubleSided>
                            <omitCover>1</omitCover>
                        </configuration>
                    </execution>
                </executions>
                <configuration>
                    <!-- These parameters apply to pdf and webhelp -->
                    <xincludeSupported>true</xincludeSupported>
                    <sourceDirectory>.</sourceDirectory>
                    <includes>
                        bk-openstack-arch-design.xml
                    </includes>
                    <canonicalUrlBase>http://docs.openstack.org/openstack-arch-design/content</canonicalUrlBase>
                    <glossaryCollection>${basedir}/../glossary/glossary-terms.xml</glossaryCollection>
                    <branding>openstack</branding>
                    <formalProcedures>0</formalProcedures>
                </configuration>
            </plugin>
        </plugins>
    </build>
 </project>
--- a/doc/arch-design-rst/setup.cfg
+++ b/doc/arch-design-rst/setup.cfg
--- a/doc/arch-design-rst/setup.py
+++ b/doc/arch-design-rst/setup.py
--- a/doc/arch-design-rst/source/common
+++ b/doc/arch-design-rst/source/common
--- a/doc/arch-design-rst/source/compute-focus-architecture.rst
+++ b/doc/arch-design-rst/source/compute-focus-architecture.rst
--- a/doc/arch-design-rst/source/compute-focus-operational-considerations.rst
+++ b/doc/arch-design-rst/source/compute-focus-operational-considerations.rst
--- a/Show More
+++ b/Show More