Merge "[arch-design] Move RST guide to arch-design folder"
@ -35,6 +35,11 @@ Virtual Machine Image Guide
|
|||||||
|
|
||||||
* RST conversion finished.
|
* RST conversion finished.
|
||||||
|
|
||||||
|
Architecture Design Guide
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
* Completed RST conversion.
|
||||||
|
|
||||||
Translations
|
Translations
|
||||||
------------
|
------------
|
||||||
|
|
||||||
|
@ -30,9 +30,9 @@ declare -A SPECIAL_BOOKS=(
|
|||||||
["networking-guide"]="RST"
|
["networking-guide"]="RST"
|
||||||
["user-guide"]="RST"
|
["user-guide"]="RST"
|
||||||
["user-guide-admin"]="RST"
|
["user-guide-admin"]="RST"
|
||||||
|
["arch-design"]="RST"
|
||||||
# Skip in-progress guides
|
# Skip in-progress guides
|
||||||
["contributor-guide"]="skip"
|
["contributor-guide"]="skip"
|
||||||
["arch-design-rst"]="skip"
|
|
||||||
["config-ref-rst"]="skip"
|
["config-ref-rst"]="skip"
|
||||||
# This needs special handling, handle it with the RST tools.
|
# This needs special handling, handle it with the RST tools.
|
||||||
["common-rst"]="RST"
|
["common-rst"]="RST"
|
||||||
|
@ -1,64 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<book xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="openstack-arch-design">
|
|
||||||
<title>OpenStack Architecture Design Guide</title>
|
|
||||||
<?rax title.font.size="28px" subtitle.font.size="28px"?>
|
|
||||||
<titleabbrev>Architecture Guide</titleabbrev>
|
|
||||||
<info>
|
|
||||||
<author>
|
|
||||||
<personname>
|
|
||||||
<firstname/>
|
|
||||||
<surname/>
|
|
||||||
</personname>
|
|
||||||
<affiliation>
|
|
||||||
<orgname>OpenStack Foundation</orgname>
|
|
||||||
</affiliation>
|
|
||||||
</author>
|
|
||||||
<copyright>
|
|
||||||
<year>2014</year>
|
|
||||||
<year>2015</year>
|
|
||||||
<holder>OpenStack Foundation</holder>
|
|
||||||
</copyright>
|
|
||||||
<releaseinfo>current</releaseinfo>
|
|
||||||
<productname>OpenStack</productname>
|
|
||||||
<pubdate/>
|
|
||||||
<legalnotice role="apache2">
|
|
||||||
<annotation>
|
|
||||||
<remark>Copyright details are filled in by the
|
|
||||||
template.</remark>
|
|
||||||
</annotation>
|
|
||||||
</legalnotice>
|
|
||||||
<legalnotice role="cc-by">
|
|
||||||
<annotation>
|
|
||||||
<remark>Remaining licensing details are filled in by
|
|
||||||
the template.</remark>
|
|
||||||
</annotation>
|
|
||||||
</legalnotice>
|
|
||||||
<abstract>
|
|
||||||
<para>To reap the benefits of OpenStack, you should
|
|
||||||
plan, design, and architect your cloud properly,
|
|
||||||
taking user's needs into account and understanding the
|
|
||||||
use cases.</para>
|
|
||||||
</abstract>
|
|
||||||
</info>
|
|
||||||
<!-- Chapters are referred from the book file through these
|
|
||||||
include statements. You can add additional chapters using
|
|
||||||
these types of statements. -->
|
|
||||||
<xi:include href="../common/ch_preface.xml"/>
|
|
||||||
<xi:include href="ch_introduction.xml"/>
|
|
||||||
<xi:include href="ch_legal-security-requirements.xml"/>
|
|
||||||
<xi:include href="ch_generalpurpose.xml"/>
|
|
||||||
<xi:include href="ch_compute_focus.xml"/>
|
|
||||||
<xi:include href="ch_storage_focus.xml"/>
|
|
||||||
<xi:include href="ch_network_focus.xml"/>
|
|
||||||
<xi:include href="ch_multi_site.xml"/>
|
|
||||||
<xi:include href="ch_hybrid.xml"/>
|
|
||||||
<xi:include href="ch_massively_scalable.xml"/>
|
|
||||||
<xi:include href="ch_specialized.xml"/>
|
|
||||||
<xi:include href="ch_references.xml"/>
|
|
||||||
<xi:include href="../common/app_support.xml"/>
|
|
||||||
<glossary role="auto"/>
|
|
||||||
</book>
|
|
@ -1,45 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="compute_focus">
|
|
||||||
<title>Compute focused</title>
|
|
||||||
<para>Compute-focused clouds are a specialized subset of the general purpose
|
|
||||||
OpenStack cloud architecture. A compute-focused cloud specifically supports
|
|
||||||
compute intensive workloads.</para>
|
|
||||||
<note>
|
|
||||||
<para>Compute intensive workloads may be CPU intensive, RAM intensive,
|
|
||||||
or both; they are not typically storage or network intensive.</para>
|
|
||||||
</note>
|
|
||||||
<para>Compute-focused workloads may include the following use cases:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>High performance computing (HPC)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Big data analytics using Hadoop or other distributed data
|
|
||||||
stores</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Continuous integration/continuous deployment (CI/CD)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Platform-as-a-Service (PaaS)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Signal processing for network function virtualization (NFV)</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<note>
|
|
||||||
<para>A compute-focused OpenStack cloud does not typically use raw block storage
|
|
||||||
services as it does not host applications that require
|
|
||||||
persistent block storage.</para>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
<xi:include href="compute_focus/section_tech_considerations_compute_focus.xml"/>
|
|
||||||
<xi:include href="compute_focus/section_operational_considerations_compute_focus.xml"/>
|
|
||||||
<xi:include href="compute_focus/section_architecture_compute_focus.xml"/>
|
|
||||||
<xi:include href="compute_focus/section_prescriptive_examples_compute_focus.xml"/>
|
|
||||||
|
|
||||||
</chapter>
|
|
@ -1,95 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="generalpurpose">
|
|
||||||
<title>General purpose</title>
|
|
||||||
<para>An OpenStack general purpose cloud is often considered a
|
|
||||||
starting point for building a cloud deployment. They are designed
|
|
||||||
to balance the components and do not emphasize any particular aspect
|
|
||||||
of the overall computing environment.
|
|
||||||
Cloud design must give equal weight to the compute, network, and
|
|
||||||
storage components. General purpose clouds are
|
|
||||||
found in private, public, and hybrid environments, lending
|
|
||||||
themselves to many different use cases.
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
General purpose clouds are homogeneous deployments. They are
|
|
||||||
not suited to specialized environments or edge case situations.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
<para>
|
|
||||||
Common uses of a general purpose cloud include:
|
|
||||||
</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Providing a simple database
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
A web application runtime environment
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
A shared application development platform
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Lab test bed
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Use cases that benefit from scale-out rather than scale-up approaches
|
|
||||||
are good candidates for general purpose cloud architecture.
|
|
||||||
</para>
|
|
||||||
<para>A general purpose cloud is designed to have a range of potential
|
|
||||||
uses or functions; not specialized for specific use cases. General
|
|
||||||
purpose architecture is designed to address 80% of potential use
|
|
||||||
cases available. The infrastructure, in itself, is a specific use case,
|
|
||||||
enabling it to be used as a base model for the design process.
|
|
||||||
General purpose clouds are designed to be platforms that are suited
|
|
||||||
for general purpose applications.</para>
|
|
||||||
<para>General purpose clouds are limited to the most basic
|
|
||||||
components, but they can include additional resources such
|
|
||||||
as:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Virtual-machine disk image library</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Raw block storage</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>File or object storage</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Firewalls</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Load balancers</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>IP addresses</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Network overlays or virtual local area networks
|
|
||||||
(VLANs)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Software bundles</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
|
|
||||||
<xi:include href="generalpurpose/section_user_requirements_general_purpose.xml"/>
|
|
||||||
<xi:include href="generalpurpose/section_tech_considerations_general_purpose.xml"/>
|
|
||||||
<xi:include href="generalpurpose/section_operational_considerations_general_purpose.xml"/>
|
|
||||||
<xi:include href="generalpurpose/section_architecture_general_purpose.xml"/>
|
|
||||||
<xi:include href="generalpurpose/section_prescriptive_example_general_purpose.xml"/>
|
|
||||||
|
|
||||||
</chapter>
|
|
@ -1,59 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="hybrid">
|
|
||||||
<title>Hybrid</title>
|
|
||||||
<para>A <glossterm baseform="hybrid cloud">hybrid cloud</glossterm> design
|
|
||||||
is one that uses more than one cloud. For example, designs that use
|
|
||||||
both an OpenStack-based private cloud and an OpenStack-based public
|
|
||||||
cloud, or that use an OpenStack cloud and a non-OpenStack cloud,
|
|
||||||
are hybrid clouds.</para>
|
|
||||||
<para><glossterm baseform="bursting">Bursting</glossterm> describes the
|
|
||||||
practice of creating new instances in an external cloud to alleviate
|
|
||||||
capacity issues in a private cloud.</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<title>Example scenarios suited to hybrid clouds</title>
|
|
||||||
<listitem>
|
|
||||||
<para>Bursting from a private cloud to a public
|
|
||||||
cloud</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Disaster recovery</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Development and testing</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Federated cloud, enabling users to choose resources
|
|
||||||
from multiple providers</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Supporting legacy systems as they transition to the
|
|
||||||
cloud</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Hybrid clouds interact with systems that are outside
|
|
||||||
the control of the private cloud administrator, and require careful
|
|
||||||
architecture to prevent conflicts with hardware, software,
|
|
||||||
and APIs under external control.</para>
|
|
||||||
<para>The degree to which the architecture is OpenStack-based
|
|
||||||
affects your ability to accomplish tasks with native
|
|
||||||
OpenStack tools. By definition, this is a situation in which
|
|
||||||
no single cloud can provide all of the necessary
|
|
||||||
functionality. In order to manage the entire system, we recommend
|
|
||||||
using a cloud management platform (CMP).</para>
|
|
||||||
<para>There are several commercial and open source CMPs available,
|
|
||||||
but there is no single CMP that can address all needs in all scenarios,
|
|
||||||
and sometimes a manually-built solution is the best option.
|
|
||||||
This chapter includes discussion of using CMPs for managing a hybrid
|
|
||||||
cloud.</para>
|
|
||||||
|
|
||||||
<xi:include href="hybrid/section_user_requirements_hybrid.xml"/>
|
|
||||||
<xi:include href="hybrid/section_tech_considerations_hybrid.xml"/>
|
|
||||||
<xi:include href="hybrid/section_operational_considerations_hybrid.xml"/>
|
|
||||||
<xi:include href="hybrid/section_architecture_hybrid.xml"/>
|
|
||||||
<xi:include href="hybrid/section_prescriptive_examples_hybrid.xml"/>
|
|
||||||
|
|
||||||
</chapter>
|
|
@ -1,18 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="introduction">
|
|
||||||
<title>Introduction</title>
|
|
||||||
|
|
||||||
<para><glossterm>OpenStack</glossterm> is a fully-featured, self-service
|
|
||||||
cloud. This book takes you through some of the considerations you have to make
|
|
||||||
when designing your cloud.</para>
|
|
||||||
|
|
||||||
<xi:include href="introduction/section_intended_audience.xml"/>
|
|
||||||
<xi:include href="introduction/section_how_this_book_is_organized.xml"/>
|
|
||||||
<xi:include href="introduction/section_how_this_book_was_written.xml"/>
|
|
||||||
<xi:include href="introduction/section_methodology.xml"/>
|
|
||||||
|
|
||||||
</chapter>
|
|
@ -1,260 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="security-legal-requirements">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Security and legal requirements</title>
|
|
||||||
<para>This chapter discusses the legal and security requirements you
|
|
||||||
need to consider for the different OpenStack scenarios.</para>
|
|
||||||
<section xml:id="legal-requirements">
|
|
||||||
<title>Legal requirements</title>
|
|
||||||
<para>Many jurisdictions have legislative and regulatory
|
|
||||||
requirements governing the storage and management of data in
|
|
||||||
cloud environments. Common areas of regulation include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Data retention policies ensuring storage of
|
|
||||||
persistent data and records management to meet data
|
|
||||||
archival requirements.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Data ownership policies governing the possession and
|
|
||||||
responsibility for data.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Data sovereignty policies governing the storage of
|
|
||||||
data in foreign countries or otherwise separate
|
|
||||||
jurisdictions.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Data compliance policies governing certain types of
|
|
||||||
information needing to reside in certain locations due to
|
|
||||||
regulatory issues - and more importantly, cannot reside in
|
|
||||||
other locations for the same reason.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Examples of such legal frameworks include the <link
|
|
||||||
xlink:href="http://ec.europa.eu/justice/data-protection/">data
|
|
||||||
protection framework</link> of the European Union and the
|
|
||||||
requirements of the <link
|
|
||||||
xlink:href="http://www.finra.org/Industry/Regulation/FINRARules/">
|
|
||||||
Financial Industry Regulatory Authority</link> in the United
|
|
||||||
States. Consult a local regulatory body for more information.
|
|
||||||
</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="security-overview">
|
|
||||||
<title>Security</title>
|
|
||||||
<para>When deploying OpenStack in an enterprise as a private
|
|
||||||
cloud, despite activating a firewall and binding
|
|
||||||
employees with security agreements, cloud architecture
|
|
||||||
should not make assumptions about safety and protection.
|
|
||||||
In addition to considering the users, operators, or administrators
|
|
||||||
who will use the environment, consider also negative or hostile users who
|
|
||||||
would attack or compromise the security of your deployment regardless
|
|
||||||
of firewalls or security agreements.</para>
|
|
||||||
<para>Attack vectors increase further in a public facing OpenStack
|
|
||||||
deployment. For example, the API endpoints and the
|
|
||||||
software behind it become vulnerable to hostile
|
|
||||||
entities attempting to gain unauthorized access or prevent access
|
|
||||||
to services. This can result in loss of reputation and you must
|
|
||||||
protect against it through auditing and appropriate
|
|
||||||
filtering.</para>
|
|
||||||
<para>It is important to understand that user authentication
|
|
||||||
requests encase sensitive information such as user names,
|
|
||||||
passwords, and authentication tokens. For this reason, place
|
|
||||||
the API services behind hardware that performs SSL termination.</para>
|
|
||||||
<warning>
|
|
||||||
<para>Be mindful of consistency when utilizing third party
|
|
||||||
clouds to explore authentication options.</para>
|
|
||||||
</warning>
|
|
||||||
</section>
|
|
||||||
<section xml:id="security-domains">
|
|
||||||
<title>Security domains</title>
|
|
||||||
<para>A security domain comprises users, applications, servers or
|
|
||||||
networks that share common trust requirements and expectations
|
|
||||||
within a system. Typically, security domains have the same
|
|
||||||
authentication and authorization requirements and users.</para>
|
|
||||||
<para>You can map security domains individually to the
|
|
||||||
installation, or combine them. For example, some
|
|
||||||
deployment topologies combine both guest and data domains onto
|
|
||||||
one physical network. In other cases these networks
|
|
||||||
are physically separate. Map out the security domains against
|
|
||||||
specific OpenStack topologies needs. The domains and their trust requirements
|
|
||||||
depend on whether the cloud instance is public, private, or
|
|
||||||
hybrid.</para>
|
|
||||||
<simplesect>
|
|
||||||
<title>Public security domains</title>
|
|
||||||
<para>The public security domain is an untrusted area of
|
|
||||||
the cloud infrastructure. It can refer to the internet as a
|
|
||||||
whole or simply to networks over which the user has no
|
|
||||||
authority. Always consider this domain untrusted. For example,
|
|
||||||
in a hybrid cloud deployment, any information traversing
|
|
||||||
between and beyond the clouds is in the public domain and
|
|
||||||
untrustworthy.</para>
|
|
||||||
</simplesect>
|
|
||||||
<simplesect>
|
|
||||||
<title>Guest security domains</title>
|
|
||||||
<para>Typically used for compute instance-to-instance traffic, the
|
|
||||||
guest security domain handles compute data generated by
|
|
||||||
instances on the cloud but not services that support the
|
|
||||||
operation of the cloud, such as API calls. Public cloud
|
|
||||||
providers and private cloud providers who do not have
|
|
||||||
stringent controls on instance use or who allow unrestricted
|
|
||||||
internet access to instances should consider this domain to be
|
|
||||||
untrusted. Private cloud providers may want to consider this
|
|
||||||
network as internal and therefore trusted only if they have
|
|
||||||
controls in place to assert that they trust instances and all
|
|
||||||
their tenants.</para>
|
|
||||||
</simplesect>
|
|
||||||
<simplesect>
|
|
||||||
<title>Management security domains</title>
|
|
||||||
<para>The management security domain is where services interact.
|
|
||||||
The networks in this domain transport confidential data such as configuration
|
|
||||||
parameters, user names, and passwords. Trust this domain when it is
|
|
||||||
behind an organization's firewall in deployments.</para>
|
|
||||||
</simplesect>
|
|
||||||
<simplesect>
|
|
||||||
<title>Data security domains</title>
|
|
||||||
<para>The data security domain is concerned primarily with
|
|
||||||
information pertaining to the storage services within
|
|
||||||
OpenStack. The data that crosses this network has integrity and
|
|
||||||
confidentiality requirements. Depending on the type of deployment there
|
|
||||||
may also be availability requirements. The trust level of this network
|
|
||||||
is heavily dependent on deployment decisions and does not have a default
|
|
||||||
level of trust.</para>
|
|
||||||
</simplesect>
|
|
||||||
</section>
|
|
||||||
<section xml:id="hypervisor-security">
|
|
||||||
<title>Hypervisor-security</title>
|
|
||||||
<para>The hypervisor also requires a security assessment. In a
|
|
||||||
public cloud, organizations typically do not have control
|
|
||||||
over the choice of hypervisor. Properly securing your
|
|
||||||
hypervisor is important. Attacks made upon the
|
|
||||||
unsecured hypervisor are called a
|
|
||||||
<firstterm>hypervisor breakout</firstterm>.
|
|
||||||
Hypervisor breakout describes the event of a
|
|
||||||
compromised or malicious instance breaking out of the resource
|
|
||||||
controls of the hypervisor and gaining access to the bare
|
|
||||||
metal operating system and hardware resources.</para>
|
|
||||||
<para>There is not an issue if the security of instances is not important.
|
|
||||||
However, enterprises need to avoid vulnerability. The only way to
|
|
||||||
do this is to avoid the situation where the instances are running
|
|
||||||
on a public cloud. That does not mean that there is a
|
|
||||||
need to own all of the infrastructure on which an OpenStack
|
|
||||||
installation operates; it suggests avoiding situations in which
|
|
||||||
sharing hardware with others occurs.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="security-baremetal">
|
|
||||||
<title>Baremetal security</title>
|
|
||||||
<para>There are other services worth considering that provide a
|
|
||||||
bare metal instance instead of a cloud. In other cases, it is
|
|
||||||
possible to replicate a second private cloud by integrating
|
|
||||||
with a private Cloud-as-a-Service deployment. The
|
|
||||||
organization does not buy the hardware, but also does not share
|
|
||||||
with other tenants. It is also possible to use a provider that
|
|
||||||
hosts a bare-metal public cloud instance for which the
|
|
||||||
hardware is dedicated only to one customer, or a provider that
|
|
||||||
offers private Cloud-as-a-Service.</para>
|
|
||||||
<important>
|
|
||||||
<para>Each cloud implements services differently.
|
|
||||||
What keeps data secure in one
|
|
||||||
cloud may not do the same in another. Be sure to know the
|
|
||||||
security requirements of every cloud that handles the
|
|
||||||
organization's data or workloads.</para>
|
|
||||||
</important>
|
|
||||||
<para>More information on OpenStack Security can be found in the
|
|
||||||
<link xlink:href="http://docs.openstack.org/security-guide"><citetitle>OpenStack
|
|
||||||
Security Guide</citetitle></link>.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="networking-security">
|
|
||||||
<title>Networking Security</title>
|
|
||||||
<para>Consider security implications and requirements before designing the
|
|
||||||
physical and logical network topologies. Make sure that the networks are
|
|
||||||
properly segregated and traffic flows are going to the correct
|
|
||||||
destinations without crossing through locations that are undesirable.
|
|
||||||
Consider the following example factors:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Firewalls</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Overlay interconnects for joining separated tenant networks</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Routing through or avoiding specific networks</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>How networks attach to hypervisors can expose security
|
|
||||||
vulnerabilities. To mitigate against exploiting hypervisor breakouts,
|
|
||||||
separate networks from other systems and schedule instances for the
|
|
||||||
network onto dedicated compute nodes. This prevents attackers
|
|
||||||
from having access to the networks from a compromised instance.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="security-multi-site">
|
|
||||||
<title>Multi-site security</title>
|
|
||||||
<para>Securing a multi-site OpenStack installation brings
|
|
||||||
extra challenges. Tenants may expect a tenant-created network
|
|
||||||
to be secure. In a multi-site installation the use of a
|
|
||||||
non-private connection between sites may be required. This may
|
|
||||||
mean that traffic would be visible to third parties and, in
|
|
||||||
cases where an application requires security, this issue
|
|
||||||
requires mitigation. In these instances, install a VPN or
|
|
||||||
encrypted connection between sites to conceal sensitive traffic.</para>
|
|
||||||
<para>Another security consideration with regard to multi-site
|
|
||||||
deployments is Identity. Centralize authentication within a
|
|
||||||
multi-site deployment. Centralization provides a
|
|
||||||
single authentication point for users across the deployment,
|
|
||||||
as well as a single point of administration for traditional
|
|
||||||
create, read, update, and delete operations. Centralized
|
|
||||||
authentication is also useful for auditing purposes because
|
|
||||||
all authentication tokens originate from the same
|
|
||||||
source.</para>
|
|
||||||
<para>Just as tenants in a single-site deployment need isolation
|
|
||||||
from each other, so do tenants in multi-site installations.
|
|
||||||
The extra challenges in multi-site designs revolve around
|
|
||||||
ensuring that tenant networks function across regions.
|
|
||||||
OpenStack Networking (neutron) does not presently support
|
|
||||||
a mechanism to provide this functionality, therefore an
|
|
||||||
external system may be necessary to manage these mappings.
|
|
||||||
Tenant networks may contain sensitive information requiring
|
|
||||||
that this mapping be accurate and consistent to ensure that a
|
|
||||||
tenant in one site does not connect to a different tenant in
|
|
||||||
another site.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="openstack-components-multi-site">
|
|
||||||
<title>OpenStack components</title>
|
|
||||||
<para>Most OpenStack installations require a bare minimum set of
|
|
||||||
pieces to function. These include OpenStack Identity
|
|
||||||
(keystone) for authentication, OpenStack Compute
|
|
||||||
(nova) for compute, OpenStack Image service (glance) for image
|
|
||||||
storage, OpenStack Networking (neutron) for networking, and
|
|
||||||
potentially an object store in the form of OpenStack Object
|
|
||||||
Storage (swift). Bringing multi-site into play also demands extra
|
|
||||||
components in order to coordinate between regions. Centralized
|
|
||||||
Identity service is necessary to provide the single authentication
|
|
||||||
point. Centralized dashboard is also recommended to provide a
|
|
||||||
single login point and a mapped experience to the API and CLI
|
|
||||||
options available. If needed, use a centralized Object Storage service,
|
|
||||||
installing the required swift proxy service alongside the Object
|
|
||||||
Storage service.</para>
|
|
||||||
<para>It may also be helpful to install a few extra options in
|
|
||||||
order to facilitate certain use cases. For instance,
|
|
||||||
installing DNS service may assist in automatically generating
|
|
||||||
DNS domains for each region with an automatically-populated
|
|
||||||
zone full of resource records for each instance. This
|
|
||||||
facilitates using DNS as a mechanism for determining which
|
|
||||||
region would be selected for certain applications.</para>
|
|
||||||
<para>Another useful tool for managing a multi-site installation
|
|
||||||
is Orchestration (heat). The Orchestration service
|
|
||||||
allows the use of templates to define a set of instances to
|
|
||||||
be launched together or for scaling existing sets. It can
|
|
||||||
set up matching or differentiated groupings based on
|
|
||||||
regions. For instance, if an application requires an equally
|
|
||||||
balanced number of nodes across sites, the same heat template
|
|
||||||
can be used to cover each site with small alterations to only
|
|
||||||
the region name.</para>
|
|
||||||
</section>
|
|
||||||
</chapter>
|
|
||||||
|
|
@ -1,79 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="massively_scalable">
|
|
||||||
<title>Massively scalable</title>
|
|
||||||
|
|
||||||
<para>A massively scalable architecture is a cloud
|
|
||||||
implementation that is either a very large deployment, such as
|
|
||||||
a commercial service provider might build, or
|
|
||||||
one that has the capability to support user requests for large
|
|
||||||
amounts of cloud resources.</para>
|
|
||||||
<para>An example is an infrastructure in which requests to service
|
|
||||||
500 or more instances at a time is common. A massively scalable
|
|
||||||
infrastructure fulfills such a request without exhausting the
|
|
||||||
available cloud infrastructure resources. While the high capital
|
|
||||||
cost of implementing such a cloud architecture means that it
|
|
||||||
is currently in limited use, many organizations are planning
|
|
||||||
for massive scalability in the future.</para>
|
|
||||||
<para>A massively scalable OpenStack cloud design presents a
|
|
||||||
unique set of challenges and considerations. For the most part
|
|
||||||
it is similar to a general purpose cloud architecture, as it
|
|
||||||
is built to address a non-specific range of potential use
|
|
||||||
cases or functions. Typically, it is rare that particular
|
|
||||||
workloads determine the design or configuration of massively
|
|
||||||
scalable clouds. The massively scalable cloud is most often
|
|
||||||
built as a platform for a variety of workloads. Because private
|
|
||||||
organizations rarely require or have the resources for them,
|
|
||||||
massively scalable OpenStack clouds are generally built as
|
|
||||||
commercial, public cloud offerings.</para>
|
|
||||||
<para>Services provided by a massively scalable OpenStack cloud
|
|
||||||
include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Virtual-machine disk image library</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Raw block storage</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>File or object storage</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Firewall functionality</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Load balancing functionality</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Private (non-routable) and public (floating) IP
|
|
||||||
addresses</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Virtualized network topologies</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Software bundles</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Virtual compute resources</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Like a general purpose cloud, the instances deployed in a
|
|
||||||
massively scalable OpenStack cloud do not necessarily use
|
|
||||||
any specific aspect of the cloud offering (compute, network,
|
|
||||||
or storage). As the cloud grows in scale, the number of
|
|
||||||
workloads can cause stress on all the cloud
|
|
||||||
components. This adds further stresses to supporting
|
|
||||||
infrastructure such as databases and message brokers. The
|
|
||||||
architecture design for such a cloud must account for these
|
|
||||||
performance pressures without negatively impacting user
|
|
||||||
experience.</para>
|
|
||||||
|
|
||||||
<xi:include href="massively_scalable/section_user_requirements_massively_scalable.xml"/>
|
|
||||||
<xi:include href="massively_scalable/section_tech_considerations_massively_scalable.xml"/>
|
|
||||||
<xi:include href="massively_scalable/section_operational_considerations_massively_scalable.xml"/>
|
|
||||||
|
|
||||||
</chapter>
|
|
@ -1,34 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="multi_site">
|
|
||||||
<title>Multi-site</title>
|
|
||||||
|
|
||||||
<para>OpenStack is capable of running in a multi-region
|
|
||||||
configuration. This enables some parts of OpenStack to
|
|
||||||
effectively manage a group of sites as a single cloud.</para>
|
|
||||||
<para>Some use cases that might indicate a need for a multi-site
|
|
||||||
deployment of OpenStack include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>An organization with a diverse geographic
|
|
||||||
footprint.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Geo-location sensitive data.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Data locality, in which specific data or
|
|
||||||
functionality should be close to users.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
|
|
||||||
<xi:include href="multi_site/section_user_requirements_multi_site.xml"/>
|
|
||||||
<xi:include href="multi_site/section_tech_considerations_multi_site.xml"/>
|
|
||||||
<xi:include href="multi_site/section_operational_considerations_multi_site.xml"/>
|
|
||||||
<xi:include href="multi_site/section_architecture_multi_site.xml"/>
|
|
||||||
<xi:include href="multi_site/section_prescriptive_examples_multi_site.xml"/>
|
|
||||||
|
|
||||||
</chapter>
|
|
@ -1,152 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="network_focus">
|
|
||||||
<title>Network focused</title>
|
|
||||||
<para>All OpenStack deployments depend on network communication in order
|
|
||||||
to function properly due to its service-based nature. In some cases,
|
|
||||||
however, the network elevates beyond simple
|
|
||||||
infrastructure. This chapter discusses architectures that are more
|
|
||||||
reliant or focused on network services. These architectures depend
|
|
||||||
on the network infrastructure and require
|
|
||||||
network services that perform reliably in order to satisfy user and
|
|
||||||
application requirements.</para>
|
|
||||||
<para>Some possible use cases include:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Content delivery network</term>
|
|
||||||
<listitem>
|
|
||||||
<para>This includes streaming video, viewing photographs, or
|
|
||||||
accessing any other cloud-based data repository distributed to
|
|
||||||
a large number of end users. Network configuration affects
|
|
||||||
latency, bandwidth, and the distribution of instances. Therefore,
|
|
||||||
it impacts video streaming. Not all video streaming is
|
|
||||||
consumer-focused. For example, multicast videos (used for media,
|
|
||||||
press conferences, corporate presentations, and web conferencing
|
|
||||||
services) can also use a content delivery network.
|
|
||||||
The location of the video repository and its relationship to end
|
|
||||||
users affects content delivery. Network throughput of the back-end
|
|
||||||
systems, as well as the WAN architecture and the cache methodology,
|
|
||||||
also affect performance.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Network management functions</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Use this cloud to provide network service functions built to
|
|
||||||
support the delivery of back-end network services such as DNS,
|
|
||||||
NTP, or SNMP.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Network service offerings</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Use this cloud to run customer-facing network tools to
|
|
||||||
support services. Examples include VPNs, MPLS private networks,
|
|
||||||
and GRE tunnels.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Web portals or web services</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Web servers are a common application for cloud services,
|
|
||||||
and we recommend an understanding of their network requirements.
|
|
||||||
The network requires scaling out to meet user demand and deliver
|
|
||||||
web pages with a minimum latency. Depending on the details of
|
|
||||||
the portal architecture, consider the internal east-west and
|
|
||||||
north-south network bandwidth.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>High speed and high volume transactional systems</term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
These types of applications are sensitive to network
|
|
||||||
configurations. Examples include financial systems,
|
|
||||||
credit card transaction applications, and trading and other
|
|
||||||
extremely high volume systems. These systems are sensitive
|
|
||||||
to network jitter and latency. They must balance a high volume
|
|
||||||
of East-West and North-South network traffic to
|
|
||||||
maximize efficiency of the data delivery.
|
|
||||||
Many of these systems must access large, high performance
|
|
||||||
database back ends.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>High availability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>These types of use cases are dependent on the proper sizing
|
|
||||||
of the network to maintain replication of data between sites for
|
|
||||||
high availability. If one site becomes unavailable, the extra
|
|
||||||
sites can serve the displaced load until the original site
|
|
||||||
returns to service. It is important to size network capacity
|
|
||||||
to handle the desired loads.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Big data</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Clouds used for the management and collection of big data
|
|
||||||
(data ingest) have a significant demand on network resources.
|
|
||||||
Big data often uses partial replicas of the data to maintain
|
|
||||||
integrity over large distributed clouds. Other big data
|
|
||||||
applications that require a large amount of network resources
|
|
||||||
are Hadoop, Cassandra, NuoDB, Riak, and other NoSQL and
|
|
||||||
distributed databases.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Virtual desktop infrastructure (VDI)</term>
|
|
||||||
<listitem>
|
|
||||||
<para>This use case is sensitive to network congestion, latency,
|
|
||||||
jitter, and other network characteristics. Like video streaming,
|
|
||||||
the user experience is important. However, unlike video
|
|
||||||
streaming, caching is not an option to offset the network issues.
|
|
||||||
VDI requires both upstream and downstream traffic and cannot rely
|
|
||||||
on caching for the delivery of the application to the end user.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Voice over IP (VoIP)</term>
|
|
||||||
<listitem>
|
|
||||||
<para>This is sensitive to network congestion, latency, jitter,
|
|
||||||
and other network characteristics. VoIP has a symmetrical traffic
|
|
||||||
pattern and it requires network quality of service (QoS) for best
|
|
||||||
performance. In addition, you can implement active queue management
|
|
||||||
to deliver voice and multimedia content. Users are sensitive to
|
|
||||||
latency and jitter fluctuations and can detect them at very low
|
|
||||||
levels.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Video Conference or web conference</term>
|
|
||||||
<listitem>
|
|
||||||
<para>This is sensitive to network congestion, latency, jitter,
|
|
||||||
and other network characteristics. Video Conferencing has a
|
|
||||||
symmetrical traffic pattern, but unless the network is on an
|
|
||||||
MPLS private network, it cannot use network quality of service
|
|
||||||
(QoS) to improve performance. Similar to VoIP, users are
|
|
||||||
sensitive to network performance issues even at low levels.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>High performance computing (HPC)</term>
|
|
||||||
<listitem>
|
|
||||||
<para>This is a complex use case that requires careful
|
|
||||||
consideration of the traffic flows and usage patterns to address
|
|
||||||
the needs of cloud clusters. It has high east-west traffic
|
|
||||||
patterns for distributed computing, but there can be substantial
|
|
||||||
north-south traffic depending on the specific application.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
|
|
||||||
<xi:include href="network_focus/section_user_requirements_network_focus.xml"/>
|
|
||||||
<xi:include href="network_focus/section_tech_considerations_network_focus.xml"/>
|
|
||||||
<xi:include href="network_focus/section_operational_considerations_network_focus.xml"/>
|
|
||||||
<xi:include href="network_focus/section_architecture_network_focus.xml"/>
|
|
||||||
<xi:include href="network_focus/section_prescriptive_examples_network_focus.xml"/>
|
|
||||||
|
|
||||||
</chapter>
|
|
@ -1,128 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="arch-design-references">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>References</title>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://ec.europa.eu/justice/data-protection/">Data
|
|
||||||
Protection framework of the European Union</link>: Guidance on
|
|
||||||
Data Protection laws governed by the EU.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://www.internetsociety.org/deploy360/blog/2014/05/goodbye-ipv4-iana-starts-allocating-final-address-blocks/">Depletion
|
|
||||||
of IPv4 Addresses</link>: describing how IPv4 addresses and the
|
|
||||||
migration to IPv6 is inevitable.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://www.garrettcom.com/techsupport/papers/ethernet_switch_reliability.pdf">Ethernet
|
|
||||||
Switch Reliability</link>: Research white paper on Ethernet Switch
|
|
||||||
reliability.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://www.finra.org/Industry/Regulation/FINRARules/">Financial
|
|
||||||
Industry Regulatory Authority</link>: Requirements of the
|
|
||||||
Financial Industry Regulatory Authority in the USA.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://docs.openstack.org/cli-reference/content/chapter_cli-glance-property.html">Image
|
|
||||||
Service property keys</link>: Glance API property keys allows the
|
|
||||||
administrator to attach custom characteristics to images.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link xlink:href="http://libguestfs.org">LibGuestFS
|
|
||||||
Documentation</link>: Official LibGuestFS documentation.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://docs.openstack.org/openstack-ops/content/logging_monitoring.html">Logging
|
|
||||||
and Monitoring</link>: Official OpenStack Operations
|
|
||||||
documentation.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link xlink:href="http://manageiq.org/">ManageIQ Cloud Management
|
|
||||||
Platform</link>: An Open Source Cloud Management Platform for
|
|
||||||
managing multiple clouds.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://www.n-tron.com/pdf/network_availability.pdf">N-Tron
|
|
||||||
Network Availability</link>: Research white paper on network
|
|
||||||
availability.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://davejingtian.org/2014/03/30/nested-kvm-just-for-fun">Nested
|
|
||||||
KVM</link>: Post on how to nest KVM under KVM.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link xlink:href="http://www.opencompute.org/">Open Compute
|
|
||||||
Project</link>: The Open Compute Project Foundation's mission is
|
|
||||||
to design and enable the delivery of the most efficient server,
|
|
||||||
storage and data center hardware designs for scalable
|
|
||||||
computing.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://docs.openstack.org/openstack-ops/content/flavors.html">OpenStack
|
|
||||||
Flavors</link>: Official OpenStack documentation.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://docs.openstack.org/ha-guide/">OpenStack
|
|
||||||
High Availability Guide</link>: Information on how to provide
|
|
||||||
redundancy for the OpenStack components.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="https://wiki.openstack.org/wiki/HypervisorSupportMatrix">OpenStack
|
|
||||||
Hypervisor Support Matrix</link>: Matrix of supported hypervisors
|
|
||||||
and capabilities when used with OpenStack.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://docs.openstack.org/developer/swift/replication_network.html">OpenStack
|
|
||||||
Object Store (Swift) Replication Reference</link>: Developer
|
|
||||||
documentation of Swift replication.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://docs.openstack.org/openstack-ops/">OpenStack
|
|
||||||
Operations Guide</link>: The OpenStack Operations Guide provides
|
|
||||||
information on setting up and installing OpenStack.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://docs.openstack.org/security-guide/">OpenStack
|
|
||||||
Security Guide</link>: The OpenStack Security Guide provides
|
|
||||||
information on securing OpenStack deployments.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="http://www.openstack.org/marketplace/training">OpenStack
|
|
||||||
Training Marketplace</link>: The OpenStack Market for training and
|
|
||||||
Vendors providing training on OpenStack.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="https://wiki.openstack.org/wiki/Pci_passthrough#How_to_check_PCI_status_with_PCI_api_paches">PCI
|
|
||||||
passthrough</link>: The PCI API patches extend the
|
|
||||||
servers/os-hypervisor to show PCI information for instance and
|
|
||||||
compute node, and also provides a resource endpoint to show PCI
|
|
||||||
information.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
xlink:href="https://wiki.openstack.org/wiki/TripleO">TripleO</link>:
|
|
||||||
TripleO is a program aimed at installing, upgrading and operating
|
|
||||||
OpenStack clouds using OpenStack's own cloud facilities as the
|
|
||||||
foundation.
|
|
||||||
</para>
|
|
||||||
</chapter>
|
|
@ -1,67 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="specialized">
|
|
||||||
<title>Specialized cases</title>
|
|
||||||
<para>Although most OpenStack architecture designs fall into one
|
|
||||||
of the seven major scenarios outlined in other sections
|
|
||||||
(compute focused, network focused, storage focused, general
|
|
||||||
purpose, multi-site, hybrid cloud, and massively scalable),
|
|
||||||
there are a few use cases that do not fit into these categories.
|
|
||||||
This section discusses these specialized cases and provide
|
|
||||||
some additional details and design considerations
|
|
||||||
for each use case:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
linkend="specialized-networking-example">Specialized
|
|
||||||
networking</link>: describes running
|
|
||||||
networking-oriented software that may involve reading
|
|
||||||
packets directly from the wire or participating in
|
|
||||||
routing protocols.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
linkend="software-defined-networking-sdn">Software-defined
|
|
||||||
networking (SDN)</link>: describes both
|
|
||||||
running an SDN controller from within OpenStack as well
|
|
||||||
as participating in a software-defined network.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
linkend="desktop-as-a-service">Desktop-as-a-Service</link>:
|
|
||||||
describes running a virtualized desktop environment
|
|
||||||
in a cloud (<glossterm>Desktop-as-a-Service</glossterm>).
|
|
||||||
This applies to private and public clouds.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link
|
|
||||||
linkend="arch-guide-openstack-on-openstack">OpenStack on
|
|
||||||
OpenStack</link>: describes building a multi-tiered cloud by
|
|
||||||
running OpenStack on top of an OpenStack installation.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link linkend="specialized-hardware">Specialized
|
|
||||||
hardware</link>: describes the use of specialized
|
|
||||||
hardware devices from within the OpenStack environment.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<xi:include href="specialized/section_multi_hypervisor_specialized.xml"/>
|
|
||||||
<xi:include href="specialized/section_networking_specialized.xml"/>
|
|
||||||
<xi:include href="specialized/section_software_defined_networking_specialized.xml"/>
|
|
||||||
<xi:include href="specialized/section_desktop_as_a_service_specialized.xml"/>
|
|
||||||
<xi:include href="specialized/section_openstack_on_openstack_specialized.xml"/>
|
|
||||||
<xi:include href="specialized/section_hardware_specialized.xml"/>
|
|
||||||
</chapter>
|
|
@ -1,78 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<chapter xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="storage_focus">
|
|
||||||
<title>Storage focused</title>
|
|
||||||
|
|
||||||
<para>Cloud storage is a model of data storage that stores digital
|
|
||||||
data in logical pools and physical storage that spans
|
|
||||||
across multiple servers and locations. Cloud storage commonly
|
|
||||||
refers to a hosted object storage service, however the term
|
|
||||||
also includes other types of data storage that are
|
|
||||||
available as a service, for example block storage.</para>
|
|
||||||
<para>Cloud storage runs on virtualized infrastructure and
|
|
||||||
resembles broader cloud computing in terms of accessible
|
|
||||||
interfaces, elasticity, scalability, multi-tenancy, and
|
|
||||||
metered resources. You can use cloud storage services from
|
|
||||||
an off-premises service or deploy on-premises.</para>
|
|
||||||
<para>Cloud storage consists of many distributed, synonymous
|
|
||||||
resources, which are often referred to as integrated
|
|
||||||
storage clouds. Cloud storage is highly fault tolerant through
|
|
||||||
redundancy and the distribution of data. It is highly durable
|
|
||||||
through the creation of versioned copies, and can be
|
|
||||||
consistent with regard to data replicas.</para>
|
|
||||||
<para>At large scale, management of data operations is
|
|
||||||
a resource intensive process for an organization. Hierarchical
|
|
||||||
storage management (HSM) systems and data grids help
|
|
||||||
annotate and report a baseline data valuation to make
|
|
||||||
intelligent decisions and automate data decisions. HSM enables
|
|
||||||
automated tiering and movement, as well as orchestration
|
|
||||||
of data operations. A data grid is an architecture, or set of
|
|
||||||
services evolving technology, that brings together sets of
|
|
||||||
services enabling users to manage large data sets.</para>
|
|
||||||
<para>Example applications deployed with cloud
|
|
||||||
storage characteristics:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Active archive, backups and hierarchical storage
|
|
||||||
management.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>General content storage and synchronization. An
|
|
||||||
example of this is private dropbox.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Data analytics with parallel file systems.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Unstructured data store for services. For example,
|
|
||||||
social media back-end storage.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Persistent block storage.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Operating system and application image store.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Media streaming.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Databases.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Content distribution.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Cloud storage peering.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
|
|
||||||
<xi:include href="storage_focus/section_tech_considerations_storage_focus.xml"/>
|
|
||||||
<xi:include href="storage_focus/section_operational_considerations_storage_focus.xml"/>
|
|
||||||
<xi:include href="storage_focus/section_architecture_storage_focus.xml"/>
|
|
||||||
<xi:include href="storage_focus/section_prescriptive_examples_storage_focus.xml"/>
|
|
||||||
|
|
||||||
</chapter>
|
|
@ -1,268 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="arch-design-architecture-hardware">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Architecture</title>
|
|
||||||
<para>The hardware selection covers three areas:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Compute</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Network</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Storage</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Compute-focused OpenStack clouds have high demands on processor and
|
|
||||||
memory resources, and requires hardware that can handle these demands.
|
|
||||||
Consider the following factors when selecting compute (server) hardware:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Server density</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Resource capacity</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Expandability</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Cost</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Weigh these considerations against each other to determine the
|
|
||||||
best design for the desired purpose. For example, increasing server density
|
|
||||||
means sacrificing resource capacity or expandability.</para>
|
|
||||||
<para>A compute-focused cloud should have an emphasis on server hardware
|
|
||||||
that can offer more CPU sockets, more CPU cores, and more RAM. Network
|
|
||||||
connectivity and storage capacity are less critical.</para>
|
|
||||||
<para>When designing a compute-focused OpenStack architecture, you must
|
|
||||||
consider whether you intend to scale up or scale out.
|
|
||||||
Selecting a smaller number of larger hosts, or a
|
|
||||||
larger number of smaller hosts, depends on a combination of factors:
|
|
||||||
cost, power, cooling, physical rack and floor space, support-warranty,
|
|
||||||
and manageability.</para>
|
|
||||||
<para>Considerations for selecting hardware:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Most blade servers can support dual-socket multi-core CPUs. To
|
|
||||||
avoid this CPU limit, select <literal>full width</literal>
|
|
||||||
or <literal>full height</literal> blades.
|
|
||||||
Be aware, however, that this also decreases server density. For example,
|
|
||||||
high density blade servers such as HP BladeSystem or Dell PowerEdge
|
|
||||||
M1000e support up to 16 servers in only ten rack units. Using
|
|
||||||
half-height blades is twice as dense as using full-height blades,
|
|
||||||
which results in only eight servers per ten rack units.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>1U rack-mounted servers that occupy only a single rack
|
|
||||||
unit may offer greater server density than a blade server
|
|
||||||
solution. It is possible to place forty 1U servers in a rack, providing
|
|
||||||
space for the top of rack (ToR) switches, compared to 32 full width
|
|
||||||
blade servers.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>2U rack-mounted servers provide quad-socket, multi-core CPU
|
|
||||||
support, but with a corresponding decrease in server density (half
|
|
||||||
the density that 1U rack-mounted servers offer).</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Larger rack-mounted servers, such as 4U servers, often provide
|
|
||||||
even greater CPU capacity, commonly supporting four or even eight CPU
|
|
||||||
sockets. These servers have greater expandability, but such servers
|
|
||||||
have much lower server density and are often more expensive.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para><literal>Sled servers</literal> are rack-mounted servers that
|
|
||||||
support multiple
|
|
||||||
independent servers in a single 2U or 3U enclosure. These deliver higher
|
|
||||||
density as compared to typical 1U or 2U rack-mounted servers. For
|
|
||||||
example, many sled servers offer four independent dual-socket
|
|
||||||
nodes in 2U for a total of eight CPU sockets in 2U.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Consider these when choosing server hardware for a compute-
|
|
||||||
focused OpenStack design architecture:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Instance density</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Host density</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Power and cooling density</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
|
|
||||||
<section xml:id="selecting-networking-hardware-arch">
|
|
||||||
<title>Selecting networking hardware</title>
|
|
||||||
<para>Some of the key considerations for networking hardware selection
|
|
||||||
include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Port count</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Port density</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Port speed</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Redundancy</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Power requirements</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>We recommend designing the network architecture using
|
|
||||||
a scalable network model that makes it easy to add capacity and
|
|
||||||
bandwidth. A good example of such a model is the leaf-spline model. In
|
|
||||||
this type of network design, it is possible to easily add additional
|
|
||||||
bandwidth as well as scale out to additional racks of gear. It is
|
|
||||||
important to select network hardware that supports the required
|
|
||||||
port count, port speed, and port density while also allowing for future
|
|
||||||
growth as workload demands increase. It is also important to evaluate
|
|
||||||
where in the network architecture it is valuable to provide redundancy.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="os-and-hypervisor-arch">
|
|
||||||
<title>Operating system and hypervisor</title>
|
|
||||||
<para>The selection of operating system (OS) and hypervisor has a
|
|
||||||
significant impact on the end point design.</para>
|
|
||||||
<para>OS and hypervisor selection impact the following areas:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Cost</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Supportability</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Management tools</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Scale and performance</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Security</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Supported features</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Interoperability</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="openstack-components-arch">
|
|
||||||
<title>OpenStack components</title>
|
|
||||||
<para>The selection of OpenStack components is important.
|
|
||||||
There are certain components that are required, for example the compute
|
|
||||||
and image services, but others, such as the Orchestration service, may not
|
|
||||||
be present.</para>
|
|
||||||
<para>For a compute-focused OpenStack design architecture, the
|
|
||||||
following components may be present:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Identity (keystone)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Dashboard (horizon)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Compute (nova)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Object Storage (swift)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Image (glance)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Networking (neutron)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Orchestration (heat)</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<note>
|
|
||||||
<para>A compute-focused design is less likely to include OpenStack Block
|
|
||||||
Storage. However, there may be some situations where the need for
|
|
||||||
performance requires a block storage component to improve data I-O.</para>
|
|
||||||
</note>
|
|
||||||
<para>The exclusion of certain OpenStack components might also limit the
|
|
||||||
functionality of other components. If a design includes
|
|
||||||
the Orchestration service but excludes the Telemetry service, then
|
|
||||||
the design cannot take advantage of Orchestration's auto
|
|
||||||
scaling functionality as this relies on information from Telemetry.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="networking-software-arch">
|
|
||||||
<title>Networking software</title>
|
|
||||||
<para>OpenStack Networking provides a wide variety of networking services
|
|
||||||
for instances. There are many additional networking software packages
|
|
||||||
that might be useful to manage the OpenStack components themselves.
|
|
||||||
The <citetitle>OpenStack High Availability Guide</citetitle>
|
|
||||||
(<link xlink:href="http://docs.openstack.org/ha-guide/">http://docs.openstack.org/ha-guide/</link>)
|
|
||||||
describes some of these software packages in more detail.
|
|
||||||
</para>
|
|
||||||
<para>For a compute-focused OpenStack cloud, the OpenStack infrastructure
|
|
||||||
components must be highly available. If the design does not
|
|
||||||
include hardware load balancing, you must add networking software packages,
|
|
||||||
for example, HAProxy.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="management-software-arch">
|
|
||||||
<title>Management software</title>
|
|
||||||
<para>The selected supplemental software solution impacts and affects
|
|
||||||
the overall OpenStack cloud design. This includes software for
|
|
||||||
providing clustering, logging, monitoring and alerting.</para>
|
|
||||||
<para>The availability of design requirements is the main determiner
|
|
||||||
for the inclusion of clustering software, such as Corosync or Pacemaker.</para>
|
|
||||||
<para>Operational considerations determine the requirements for logging,
|
|
||||||
monitoring, and alerting. Each of these sub-categories include
|
|
||||||
various options.</para>
|
|
||||||
<para>Some other potential design impacts include:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>OS-hypervisor combination</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Ensure that the selected logging,
|
|
||||||
monitoring, or alerting tools support the proposed OS-hypervisor
|
|
||||||
combination.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Network hardware</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The logging, monitoring, and alerting software
|
|
||||||
must support the network hardware selection.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="database-software-arch">
|
|
||||||
<title>Database software</title>
|
|
||||||
<para>A large majority of OpenStack components require access to
|
|
||||||
back-end database services to store state and configuration
|
|
||||||
information. Select an appropriate back-end database that
|
|
||||||
satisfies the availability and fault tolerance requirements of the
|
|
||||||
OpenStack services. OpenStack services support connecting
|
|
||||||
to any database that the SQLAlchemy Python drivers support,
|
|
||||||
however most common database deployments make use of MySQL or some
|
|
||||||
variation of it. We recommend that you make the database that provides
|
|
||||||
back-end services within a general-purpose cloud highly
|
|
||||||
available. Some of the more common software solutions include Galera,
|
|
||||||
MariaDB, and MySQL with multi-master replication.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
</section>
|
|
@ -1,84 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="operational-considerations-compute-focus">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Operational considerations</title>
|
|
||||||
<para>There are a number of operational considerations that affect the
|
|
||||||
design of compute-focused OpenStack clouds, including:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Enforcing strict API availability requirements
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Understanding and dealing with failure scenarios
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Managing host maintenance schedules
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Service-level agreements (SLAs) are contractual obligations that
|
|
||||||
ensure the availability of a service. When designing an OpenStack cloud,
|
|
||||||
factoring in promises of availability implies a certain level of
|
|
||||||
redundancy and resiliency.</para>
|
|
||||||
|
|
||||||
<section xml:id="montioring-compute-focus">
|
|
||||||
<title>Monitoring</title>
|
|
||||||
<para>OpenStack clouds require appropriate monitoring platforms
|
|
||||||
to catch and manage errors.</para>
|
|
||||||
<note>
|
|
||||||
<para>We recommend leveraging existing monitoring systems
|
|
||||||
to see if they are able to effectively monitor an
|
|
||||||
OpenStack environment.</para>
|
|
||||||
</note>
|
|
||||||
<para>Specific meters that are critically important to capture
|
|
||||||
include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Image disk utilization</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Response time to the Compute API</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="capacity-planning-operational">
|
|
||||||
<title>Capacity planning</title>
|
|
||||||
<para>Adding extra capacity to an OpenStack cloud is a
|
|
||||||
horizontally scaling process.</para>
|
|
||||||
<para>We recommend similar (or the same) CPUs
|
|
||||||
when adding extra nodes to the environment. This reduces
|
|
||||||
the chance of breaking live-migration features if they are
|
|
||||||
present. Scaling out hypervisor hosts also has a direct effect
|
|
||||||
on network and other data center resources. We recommend you
|
|
||||||
factor in this increase when reaching rack capacity or when requiring
|
|
||||||
extra network switches.</para>
|
|
||||||
<para>Changing the internal components of a Compute host to account for
|
|
||||||
increases in demand is a process known as vertical scaling.
|
|
||||||
Swapping a CPU for one with more cores, or
|
|
||||||
increasing the memory in a server, can help add extra
|
|
||||||
capacity for running applications.</para>
|
|
||||||
<para>Another option is to assess the average workloads and
|
|
||||||
increase the number of instances that can run within the
|
|
||||||
compute environment by adjusting the overcommit ratio.</para>
|
|
||||||
<note>
|
|
||||||
<para>It is important to remember that changing the CPU
|
|
||||||
overcommit ratio can have a detrimental effect and cause
|
|
||||||
a potential increase in a noisy neighbor.</para>
|
|
||||||
</note>
|
|
||||||
<para>The added risk of increasing the overcommit ratio is that
|
|
||||||
more instances fail when a compute host fails. We do not recommend
|
|
||||||
that you increase the CPU overcommit ratio in compute-focused
|
|
||||||
OpenStack design architecture, as it can increase the potential
|
|
||||||
for noisy neighbor issues.</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,162 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="prescriptive-example-compute-focus">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Prescriptive examples</title>
|
|
||||||
<para>The Conseil Européen pour la Recherche Nucléaire (CERN),
|
|
||||||
also known as the European Organization for Nuclear Research,
|
|
||||||
provides particle accelerators and other infrastructure for
|
|
||||||
high-energy physics research.</para>
|
|
||||||
<para>As of 2011 CERN operated these two compute centers in Europe
|
|
||||||
with plans to add a third.</para>
|
|
||||||
<informaltable rules="all">
|
|
||||||
<col width="40%" />
|
|
||||||
<col width="60%" />
|
|
||||||
<thead>
|
|
||||||
<tr><th>Data center</th><th>Approximate capacity</th></tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>Geneva, Switzerland</td>
|
|
||||||
<td>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem><para>3.5 Mega Watts</para></listitem>
|
|
||||||
<listitem><para>91000 cores</para></listitem>
|
|
||||||
<listitem><para>120 PB HDD</para></listitem>
|
|
||||||
<listitem><para>100 PB Tape</para></listitem>
|
|
||||||
<listitem><para>310 TB Memory</para></listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Budapest, Hungary</td>
|
|
||||||
<td>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem><para>2.5 Mega Watts</para></listitem>
|
|
||||||
<listitem><para>20000 cores</para></listitem>
|
|
||||||
<listitem><para>6 PB HDD</para></listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</informaltable>
|
|
||||||
<para>To support a growing number of compute-heavy users of
|
|
||||||
experiments related to the Large Hadron Collider (LHC), CERN
|
|
||||||
ultimately elected to deploy an OpenStack cloud using
|
|
||||||
Scientific Linux and RDO. This effort aimed to simplify the
|
|
||||||
management of the center's compute resources with a view to
|
|
||||||
doubling compute capacity through the addition of a
|
|
||||||
data center in 2013 while maintaining the same
|
|
||||||
levels of compute staff.</para>
|
|
||||||
<para>The CERN solution uses <glossterm baseform="cell">cells</glossterm>
|
|
||||||
for segregation of compute
|
|
||||||
resources and for transparently scaling between different data
|
|
||||||
centers. This decision meant trading off support for security
|
|
||||||
groups and live migration. In addition, they must manually replicate
|
|
||||||
some details, like flavors, across cells. In
|
|
||||||
spite of these drawbacks cells provide the
|
|
||||||
required scale while exposing a single public API endpoint to
|
|
||||||
users.</para>
|
|
||||||
<para>CERN created a compute cell for each of the two original data
|
|
||||||
centers and created a third when it added a new data center
|
|
||||||
in 2013. Each cell contains three availability zones to
|
|
||||||
further segregate compute resources and at least three
|
|
||||||
RabbitMQ message brokers configured for clustering with
|
|
||||||
mirrored queues for high availability.</para>
|
|
||||||
<para>The API cell, which resides behind a HAProxy load balancer,
|
|
||||||
is in the data center in Switzerland and directs API
|
|
||||||
calls to compute cells using a customized variation of the
|
|
||||||
cell scheduler. The customizations allow certain workloads to
|
|
||||||
route to a specific data center or all data centers,
|
|
||||||
with cell RAM availability determining cell selection in the
|
|
||||||
latter case.</para>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in" fileref="../figures/Generic_CERN_Example.png"/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
<para>There is also some customization of the filter scheduler
|
|
||||||
that handles placement within the cells:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry><term>ImagePropertiesFilter</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Provides special handling
|
|
||||||
depending on the guest operating system in use
|
|
||||||
(Linux-based or Windows-based).</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry><term>ProjectsToAggregateFilter</term>
|
|
||||||
<listitem><para>Provides special
|
|
||||||
handling depending on which project the instance is
|
|
||||||
associated with.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry><term>default_schedule_zones</term>
|
|
||||||
<listitem><para>Allows the selection of
|
|
||||||
multiple default availability zones, rather than a
|
|
||||||
single default.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<para>A central database team manages the MySQL database server in each cell
|
|
||||||
in an active/passive configuration with a NetApp storage back end.
|
|
||||||
Backups run every 6 hours.</para>
|
|
||||||
|
|
||||||
<section xml:id="network-architecture">
|
|
||||||
<title>Network architecture</title>
|
|
||||||
<para>To integrate with existing networking infrastructure, CERN
|
|
||||||
made customizations to legacy networking (nova-network). This was in the
|
|
||||||
form of a driver to integrate with CERN's existing database
|
|
||||||
for tracking MAC and IP address assignments.</para>
|
|
||||||
<para>The driver facilitates selection of a MAC address and IP for
|
|
||||||
new instances based on the compute node where the scheduler places
|
|
||||||
the instance.</para>
|
|
||||||
<para>The driver considers the compute node where the scheduler
|
|
||||||
placed an instance and selects a MAC address and IP
|
|
||||||
from the pre-registered list associated with that node in the
|
|
||||||
database. The database updates to reflect the address assignment to
|
|
||||||
that instance.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="storage-architecture">
|
|
||||||
<title>Storage architecture</title>
|
|
||||||
<para>CERN deploys the OpenStack Image service in the API cell and
|
|
||||||
configures it to expose version 1 (V1) of the API. This also requires
|
|
||||||
the image registry. The storage back end in
|
|
||||||
use is a 3 PB Ceph cluster.</para>
|
|
||||||
<para>CERN maintains a small set of Scientific Linux 5 and 6 images onto
|
|
||||||
which orchestration tools can place applications. Puppet manages
|
|
||||||
instance configuration and customization.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="monitoring">
|
|
||||||
<title>Monitoring</title>
|
|
||||||
<para>CERN does not require direct billing, but uses the Telemetry service
|
|
||||||
to perform metering for the purposes of adjusting
|
|
||||||
project quotas. CERN uses a sharded, replicated, MongoDB back-end.
|
|
||||||
To spread API load, CERN deploys instances of the nova-api service
|
|
||||||
within the child cells for Telemetry to query
|
|
||||||
against. This also requires the configuration of supporting services
|
|
||||||
such as keystone, glance-api, and glance-registry in the child cells.
|
|
||||||
</para>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/Generic_CERN_Architecture.png"/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
<para>
|
|
||||||
Additional monitoring tools in use include <link
|
|
||||||
xlink:href="http://flume.apache.org/">Flume</link>, <link
|
|
||||||
xlink:href="http://www.elasticsearch.org/">Elastic
|
|
||||||
Search</link>, <link
|
|
||||||
xlink:href="http://www.elasticsearch.org/overview/kibana/">Kibana</link>,
|
|
||||||
and the CERN developed <link
|
|
||||||
xlink:href="http://lemon.web.cern.ch/lemon/index.shtml">Lemon</link>
|
|
||||||
project.
|
|
||||||
</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,275 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE section [
|
|
||||||
<!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
|
|
||||||
%openstack;
|
|
||||||
]>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="technical-considerations-compute-focus">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Technical considerations</title>
|
|
||||||
<para>In a compute-focused OpenStack cloud, the type of instance
|
|
||||||
workloads you provision heavily influences technical
|
|
||||||
decision making.</para>
|
|
||||||
<para>Public and private clouds require deterministic capacity
|
|
||||||
planning to support elastic growth in order to meet user SLA
|
|
||||||
expectations. Deterministic capacity planning is the path to
|
|
||||||
predicting the effort and expense of making a given process
|
|
||||||
perform consistently. This process is important because,
|
|
||||||
when a service becomes a critical part of a user's
|
|
||||||
infrastructure, the user's experience links directly to the SLAs of
|
|
||||||
the cloud itself.</para>
|
|
||||||
<para>There are two aspects of capacity planning to consider:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Planning the initial deployment footprint</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Planning expansion of the environment to stay ahead of the
|
|
||||||
demands of cloud users</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Begin planning an initial OpenStack deployment footprint with
|
|
||||||
estimations of expected uptake, and existing infrastructure workloads.</para>
|
|
||||||
<para>The starting point is the core count of the cloud. By
|
|
||||||
applying relevant ratios, the user can gather information
|
|
||||||
about:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>The number of expected concurrent instances:
|
|
||||||
(overcommit fraction × cores) / virtual cores per instance</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Required storage: flavor disk size × number of instances</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>These ratios determine the amount of
|
|
||||||
additional infrastructure needed to support the cloud. For
|
|
||||||
example, consider a situation in which you require 1600
|
|
||||||
instances, each with 2 vCPU and 50 GB of storage. Assuming the
|
|
||||||
default overcommit rate of 16:1, working out the math provides
|
|
||||||
an equation of:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>1600 = (16 × (number of physical cores)) / 2</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Storage required = 50 GB × 1600</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>On the surface, the equations reveal the need for 200
|
|
||||||
physical cores and 80 TB of storage for
|
|
||||||
<filename>/var/lib/nova/instances/</filename>. However,
|
|
||||||
it is also important to
|
|
||||||
look at patterns of usage to estimate the load that the API
|
|
||||||
services, database servers, and queue servers are likely to
|
|
||||||
encounter.</para>
|
|
||||||
<para>Aside from the creation and termination of instances, consider the
|
|
||||||
impact of users accessing the service,
|
|
||||||
particularly on nova-api and its associated database. Listing
|
|
||||||
instances gathers a great deal of information and given the
|
|
||||||
frequency with which users run this operation, a cloud with a
|
|
||||||
large number of users can increase the load significantly.
|
|
||||||
This can even occur unintentionally. For example, the
|
|
||||||
OpenStack Dashboard instances tab refreshes the list of
|
|
||||||
instances every 30 seconds, so leaving it open in a browser
|
|
||||||
window can cause unexpected load.</para>
|
|
||||||
<para>Consideration of these factors can help determine how many
|
|
||||||
cloud controller cores you require. A server with 8 CPU cores
|
|
||||||
and 8 GB of RAM server would be sufficient for a rack of
|
|
||||||
compute nodes, given the above caveats.</para>
|
|
||||||
<para>Key hardware specifications are also crucial to the
|
|
||||||
performance of user instances. Be sure to consider budget and
|
|
||||||
performance needs, including storage performance
|
|
||||||
(spindles/core), memory availability (RAM/core), network
|
|
||||||
bandwidth (Gbps/core), and overall CPU performance
|
|
||||||
(CPU/core).</para>
|
|
||||||
<para>The cloud resource calculator is a useful tool in examining
|
|
||||||
the impacts of different hardware and instance load outs. See:
|
|
||||||
<link xlink:href="https://github.com/noslzzp/cloud-resource-calculator/blob/master/cloud-resource-calculator.ods">https://github.com/noslzzp/cloud-resource-calculator/blob/master/cloud-resource-calculator.ods</link>
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<section xml:id="expansion-planning-compute-focus">
|
|
||||||
<title>Expansion planning</title>
|
|
||||||
<para>A key challenge for planning the expansion of cloud
|
|
||||||
compute services is the elastic nature of cloud infrastructure
|
|
||||||
demands.</para>
|
|
||||||
<para>Planning for expansion is a balancing act.
|
|
||||||
Planning too conservatively can lead to unexpected
|
|
||||||
oversubscription of the cloud and dissatisfied users. Planning
|
|
||||||
for cloud expansion too aggressively can lead to unexpected
|
|
||||||
underutilization of the cloud and funds spent unnecessarily on operating
|
|
||||||
infrastructure.</para>
|
|
||||||
<para>The key is to carefully monitor the trends in
|
|
||||||
cloud usage over time. The intent is to measure the
|
|
||||||
consistency with which you deliver services, not the
|
|
||||||
average speed or capacity of the cloud. Using this information
|
|
||||||
to model capacity performance enables users to more
|
|
||||||
accurately determine the current and future capacity of the
|
|
||||||
cloud.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="cpu-and-ram-compute-focus">
|
|
||||||
<title>CPU and RAM</title>
|
|
||||||
<para>OpenStack enables users to overcommit CPU and RAM on
|
|
||||||
compute nodes. This allows an increase in the number of
|
|
||||||
instances running on the cloud at the cost of reducing the
|
|
||||||
performance of the instances. OpenStack Compute uses the
|
|
||||||
following ratios by default:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>CPU allocation ratio: 16:1</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>RAM allocation ratio: 1.5:1</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>The default CPU allocation ratio of 16:1 means that the
|
|
||||||
scheduler allocates up to 16 virtual cores per physical core.
|
|
||||||
For example, if a physical node has 12 cores, the scheduler
|
|
||||||
sees 192 available virtual cores. With typical flavor
|
|
||||||
definitions of 4 virtual cores per instance, this ratio would
|
|
||||||
provide 48 instances on a physical node.</para>
|
|
||||||
<para>Similarly, the default RAM allocation ratio of 1.5:1 means
|
|
||||||
that the scheduler allocates instances to a physical node as
|
|
||||||
long as the total amount of RAM associated with the instances
|
|
||||||
is less than 1.5 times the amount of RAM available on the
|
|
||||||
physical node.</para>
|
|
||||||
<para>You must select the appropriate CPU and RAM allocation ratio
|
|
||||||
based on particular use cases.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="additional-hardware-compute-focus">
|
|
||||||
<title>Additional hardware</title>
|
|
||||||
<para>Certain use cases may benefit from exposure to additional
|
|
||||||
devices on the compute node. Examples might include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>High performance computing jobs that benefit from
|
|
||||||
the availability of graphics processing units (GPUs)
|
|
||||||
for general-purpose computing.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Cryptographic routines that benefit from the
|
|
||||||
availability of hardware random number generators to
|
|
||||||
avoid entropy starvation.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Database management systems that benefit from the
|
|
||||||
availability of SSDs for ephemeral storage to maximize
|
|
||||||
read/write time.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Host aggregates group hosts that share similar
|
|
||||||
characteristics, which can include hardware similarities. The
|
|
||||||
addition of specialized hardware to a cloud deployment is
|
|
||||||
likely to add to the cost of each node, so consider carefully
|
|
||||||
whether all compute nodes, or
|
|
||||||
just a subset targeted by flavors, need the
|
|
||||||
additional customization to support the desired
|
|
||||||
workloads.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="utilization">
|
|
||||||
<title>Utilization</title>
|
|
||||||
<para>Infrastructure-as-a-Service offerings, including OpenStack,
|
|
||||||
use flavors to provide standardized views of virtual machine
|
|
||||||
resource requirements that simplify the problem of scheduling
|
|
||||||
instances while making the best use of the available physical
|
|
||||||
resources.</para>
|
|
||||||
<para>In order to facilitate packing of virtual machines onto
|
|
||||||
physical hosts, the default selection of flavors provides a
|
|
||||||
second largest flavor that is half the size
|
|
||||||
of the largest flavor in every dimension. It has half the
|
|
||||||
vCPUs, half the vRAM, and half the ephemeral disk space. The
|
|
||||||
next largest flavor is half that size again. The following figure
|
|
||||||
provides a visual representation of this concept for a general
|
|
||||||
purpose computing design:
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/Compute_Tech_Bin_Packing_General1.png"
|
|
||||||
/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject></para>
|
|
||||||
<para>The following figure displays a CPU-optimized, packed server:
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/Compute_Tech_Bin_Packing_CPU_optimized1.png"
|
|
||||||
/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject></para>
|
|
||||||
<para>These default flavors are well suited to typical configurations
|
|
||||||
of commodity server hardware. To maximize utilization,
|
|
||||||
however, it may be necessary to customize the flavors or
|
|
||||||
create new ones in order to better align instance sizes to the
|
|
||||||
available hardware.</para>
|
|
||||||
<para>Workload characteristics may also influence hardware choices
|
|
||||||
and flavor configuration, particularly where they present
|
|
||||||
different ratios of CPU versus RAM versus HDD
|
|
||||||
requirements.</para>
|
|
||||||
<para>For more information on Flavors see:
|
|
||||||
<link xlink:href="http://docs.openstack.org/openstack-ops/content/flavors.html">OpenStack Operations Guide: Flavors</link></para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="openstack-components-compute-focus">
|
|
||||||
<title>OpenStack components</title>
|
|
||||||
<para>Due to the nature of the workloads in this
|
|
||||||
scenario, a number of components are highly beneficial for
|
|
||||||
a Compute-focused cloud. This includes the typical OpenStack
|
|
||||||
components:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Compute (nova)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Image service (glance)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Identity (keystone)</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Also consider several specialized components:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para><glossterm>Orchestration</glossterm> (heat)</para>
|
|
||||||
<para>Given the nature of the
|
|
||||||
applications involved in this scenario, these are heavily
|
|
||||||
automated deployments. Making use of Orchestration is highly
|
|
||||||
beneficial in this case. You can script the deployment of a
|
|
||||||
batch of instances and the running of tests, but it
|
|
||||||
makes sense to use the Orchestration service
|
|
||||||
to handle all these actions.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Telemetry (ceilometer)</para>
|
|
||||||
<para>Telemetry and the alarms it generates support autoscaling
|
|
||||||
of instances using Orchestration. Users that are not using the
|
|
||||||
Orchestration service do not need to deploy the Telemetry
|
|
||||||
service and may choose to use external solutions to fulfill
|
|
||||||
their metering and monitoring requirements.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Block Storage (cinder)</para>
|
|
||||||
<para>Due to the burst-able nature of the workloads and the
|
|
||||||
applications and instances that perform batch
|
|
||||||
processing, this cloud mainly uses memory or CPU, so
|
|
||||||
the need for add-on storage to each instance is not a likely
|
|
||||||
requirement. This does not mean that you do not use
|
|
||||||
OpenStack Block Storage (cinder) in the infrastructure, but
|
|
||||||
typically it is not a central component.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Networking</para>
|
|
||||||
<para>When choosing a networking platform, ensure that it either
|
|
||||||
works with all desired hypervisor and container technologies
|
|
||||||
and their OpenStack drivers, or that it includes an implementation of
|
|
||||||
an ML2 mechanism driver. You can mix networking platforms
|
|
||||||
that provide ML2 mechanisms drivers.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
Before Width: | Height: | Size: 52 KiB |
Before Width: | Height: | Size: 39 KiB |
Before Width: | Height: | Size: 35 KiB |
Before Width: | Height: | Size: 118 KiB |
Before Width: | Height: | Size: 83 KiB |
Before Width: | Height: | Size: 79 KiB |
Before Width: | Height: | Size: 77 KiB |
Before Width: | Height: | Size: 79 KiB |
Before Width: | Height: | Size: 70 KiB |
Before Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 18 KiB |
Before Width: | Height: | Size: 60 KiB |
Before Width: | Height: | Size: 52 KiB |
Before Width: | Height: | Size: 59 KiB |
Before Width: | Height: | Size: 49 KiB |
Before Width: | Height: | Size: 54 KiB |
Before Width: | Height: | Size: 72 KiB |
Before Width: | Height: | Size: 54 KiB |
Before Width: | Height: | Size: 68 KiB |
Before Width: | Height: | Size: 54 KiB |
Before Width: | Height: | Size: 53 KiB |
Before Width: | Height: | Size: 50 KiB |
Before Width: | Height: | Size: 54 KiB |
Before Width: | Height: | Size: 55 KiB |
Before Width: | Height: | Size: 52 KiB |
Before Width: | Height: | Size: 75 KiB |
Before Width: | Height: | Size: 40 KiB |
Before Width: | Height: | Size: 37 KiB |
Before Width: | Height: | Size: 56 KiB |
Before Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 30 KiB |
Before Width: | Height: | Size: 22 KiB |
Before Width: | Height: | Size: 46 KiB |
Before Width: | Height: | Size: 56 KiB |
Before Width: | Height: | Size: 25 KiB |
Before Width: | Height: | Size: 56 KiB |
Before Width: | Height: | Size: 52 KiB |
Before Width: | Height: | Size: 50 KiB |
Before Width: | Height: | Size: 46 KiB |
Before Width: | Height: | Size: 50 KiB |
Before Width: | Height: | Size: 35 KiB |
Before Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 1.3 MiB |
Before Width: | Height: | Size: 5.0 KiB |
Before Width: | Height: | Size: 39 KiB |
@ -1,720 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE section [
|
|
||||||
<!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
|
|
||||||
%openstack;
|
|
||||||
]>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="arch-guide-architecture-overview">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Architecture</title>
|
|
||||||
<para>Hardware selection involves three key areas:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Compute</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Network</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Storage</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Hardware for a general purpose OpenStack cloud
|
|
||||||
should reflect a cloud with no pre-defined usage model,
|
|
||||||
designed to run a wide variety of applications with
|
|
||||||
varying resource usage requirements.
|
|
||||||
These applications include any of the following:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
RAM-intensive
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
CPU-intensive
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Storage-intensive
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Certain hardware form factors may better suit a general
|
|
||||||
purpose OpenStack cloud due to the requirement for equal (or
|
|
||||||
nearly equal) balance of resources. Server hardware must provide
|
|
||||||
the following:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Equal (or nearly equal) balance of compute capacity (RAM and CPU)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Network capacity (number and speed of links)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Storage capacity (gigabytes or terabytes as well as Input/Output
|
|
||||||
Operations Per Second (<glossterm>IOPS</glossterm>)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Evaluate server hardware around four conflicting
|
|
||||||
dimensions:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Server density</term>
|
|
||||||
<listitem>
|
|
||||||
<para>A measure of how many servers can
|
|
||||||
fit into a given measure of physical space, such as a
|
|
||||||
rack unit [U].</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Resource capacity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The number of CPU cores, amount of RAM,
|
|
||||||
or amount of deliverable storage.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Expandability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Limit of additional resources you can add to
|
|
||||||
a server.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Cost</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The relative purchase price of the hardware
|
|
||||||
weighted against the level of design effort needed to
|
|
||||||
build the system.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<para>Increasing server density means sacrificing resource
|
|
||||||
capacity or expandability, however, increasing resource
|
|
||||||
capacity and expandability increases cost and decreases server
|
|
||||||
density. As a result, determining the best server hardware for
|
|
||||||
a general purpose OpenStack architecture means understanding
|
|
||||||
how choice of form factor will impact the rest of the
|
|
||||||
design. The following list outlines the form factors to
|
|
||||||
choose from:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Blade servers typically support dual-socket
|
|
||||||
multi-core CPUs. Blades also offer
|
|
||||||
outstanding density.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>1U rack-mounted servers occupy only a single rack
|
|
||||||
unit. Their benefits include high density, support for
|
|
||||||
dual-socket multi-core CPUs, and support for
|
|
||||||
reasonable RAM amounts. This form factor offers
|
|
||||||
limited storage capacity, limited network capacity,
|
|
||||||
and limited expandability.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>2U rack-mounted servers offer the expanded storage
|
|
||||||
and networking capacity that 1U servers tend to lack,
|
|
||||||
but with a corresponding decrease in server density
|
|
||||||
(half the density offered by 1U rack-mounted
|
|
||||||
servers).</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Larger rack-mounted servers, such as 4U servers,
|
|
||||||
will tend to offer even greater CPU capacity, often
|
|
||||||
supporting four or even eight CPU sockets. These
|
|
||||||
servers often have much greater expandability so will
|
|
||||||
provide the best option for upgradability. This means,
|
|
||||||
however, that the servers have a much lower server
|
|
||||||
density and a much greater hardware cost.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para><emphasis>Sled servers</emphasis> are rack-mounted servers that support
|
|
||||||
multiple independent servers in a single 2U or 3U
|
|
||||||
enclosure. This form factor offers increased density
|
|
||||||
over typical 1U-2U rack-mounted servers but tends to
|
|
||||||
suffer from limitations in the amount of storage or
|
|
||||||
network capacity each individual server
|
|
||||||
supports.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>The best form factor for server hardware
|
|
||||||
supporting a general purpose OpenStack cloud is driven by
|
|
||||||
outside business and cost factors. No single reference
|
|
||||||
architecture applies to all implementations; the decision
|
|
||||||
must flow from user requirements, technical
|
|
||||||
considerations, and operational considerations. Here are some
|
|
||||||
of the key factors that influence the selection of server
|
|
||||||
hardware:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Instance density</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Sizing is an important
|
|
||||||
consideration for a general purpose OpenStack cloud.
|
|
||||||
The expected or anticipated number of instances that
|
|
||||||
each hypervisor can host is a common meter used in
|
|
||||||
sizing the deployment. The selected server hardware
|
|
||||||
needs to support the expected or anticipated instance
|
|
||||||
density.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Host density</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Physical data centers have limited
|
|
||||||
physical space, power, and cooling. The number of
|
|
||||||
hosts (or hypervisors) that can be fitted into a given
|
|
||||||
metric (rack, rack unit, or floor tile) is another
|
|
||||||
important method of sizing. Floor weight is an often
|
|
||||||
overlooked consideration. The data center floor must
|
|
||||||
be able to support the weight of the proposed number
|
|
||||||
of hosts within a rack or set of racks. These factors
|
|
||||||
need to be applied as part of the host density
|
|
||||||
calculation and server hardware selection.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Power density</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Data centers have a specified amount
|
|
||||||
of power fed to a given rack or set of racks. Older
|
|
||||||
data centers may have a power density as power as low
|
|
||||||
as 20 AMPs per rack, while more recent data centers
|
|
||||||
can be architected to support power densities as high
|
|
||||||
as 120 AMP per rack. The selected server hardware must
|
|
||||||
take power density into account.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Network connectivity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The selected server hardware
|
|
||||||
must have the appropriate number of network
|
|
||||||
connections, as well as the right type of network
|
|
||||||
connections, in order to support the proposed
|
|
||||||
architecture. Ensure that, at a minimum, there are at
|
|
||||||
least two diverse network connections coming into each
|
|
||||||
rack.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<para>The selection of form factors or architectures affects the selection
|
|
||||||
of server hardware. Ensure that the selected server hardware
|
|
||||||
is configured to support enough storage capacity (or storage
|
|
||||||
expandability) to match the requirements of selected scale-out
|
|
||||||
storage solution. Similarly, the network architecture impacts
|
|
||||||
the server hardware selection and vice versa.</para>
|
|
||||||
|
|
||||||
<section xml:id="selecting-storage-hardware">
|
|
||||||
<title>Selecting storage hardware</title>
|
|
||||||
<para>Determine storage hardware architecture by
|
|
||||||
selecting specific storage architecture. Determine the selection of
|
|
||||||
storage architecture by evaluating possible solutions against the
|
|
||||||
critical factors, the user requirements, technical
|
|
||||||
considerations, and operational considerations.
|
|
||||||
Incorporate the following facts into your storage architecture:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Cost</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Storage can be a significant portion of the
|
|
||||||
overall system cost. For an organization that is concerned
|
|
||||||
with vendor support, a commercial storage solution is
|
|
||||||
advisable, although it comes with a higher price
|
|
||||||
tag. If initial capital expenditure requires
|
|
||||||
minimization, designing a system based on commodity
|
|
||||||
hardware would apply. The trade-off is potentially
|
|
||||||
higher support costs and a greater risk of
|
|
||||||
incompatibility and interoperability issues.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Scalability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Scalability, along with expandability, is a major
|
|
||||||
consideration in a general purpose OpenStack cloud. It
|
|
||||||
might be difficult to predict the final intended size
|
|
||||||
of the implementation as there are no established
|
|
||||||
usage patterns for a general purpose cloud. It might
|
|
||||||
become necessary to expand the initial deployment in
|
|
||||||
order to accommodate growth and user demand.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Expandability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Expandability is a major architecture factor for
|
|
||||||
storage solutions with general purpose OpenStack
|
|
||||||
cloud. A storage solution that expands
|
|
||||||
to 50 PB is considered more expandable than a
|
|
||||||
solution that only scales to 10 PB. This meter
|
|
||||||
is related to scalability, which is the measure of a
|
|
||||||
solution's performance as it expands.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<para>Using a scale-out storage solution with direct-attached
|
|
||||||
storage (DAS) in the servers is well suited for a general
|
|
||||||
purpose OpenStack cloud. Cloud services requirements determine
|
|
||||||
your choice of scale-out solution. You need to determine if
|
|
||||||
a single, highly expandable and highly vertical, scalable,
|
|
||||||
centralized storage array is suitable for your design.
|
|
||||||
After determining an approach, select the storage hardware
|
|
||||||
based on this criteria.</para>
|
|
||||||
<para>This list expands upon the potential impacts for including a
|
|
||||||
particular storage architecture (and corresponding storage
|
|
||||||
hardware) into the design for a general purpose OpenStack
|
|
||||||
cloud:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Connectivity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Ensure that, if storage protocols
|
|
||||||
other than Ethernet are part of the storage solution,
|
|
||||||
the appropriate hardware has been selected.
|
|
||||||
If a centralized storage array is selected, ensure
|
|
||||||
that the hypervisor will be able to connect to that
|
|
||||||
storage array for image storage.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Usage</term>
|
|
||||||
<listitem>
|
|
||||||
<para>How the particular storage architecture will
|
|
||||||
be used is critical for determining the architecture.
|
|
||||||
Some of the configurations that will influence the
|
|
||||||
architecture include whether it will be used by the
|
|
||||||
hypervisors for ephemeral instance storage or if
|
|
||||||
OpenStack Object Storage will use it for object storage.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Instance and image locations</term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Where instances and images will be stored will influence
|
|
||||||
the architecture.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Server hardware</term>
|
|
||||||
<listitem>
|
|
||||||
<para>If the solution is a scale-out
|
|
||||||
storage architecture that includes DAS, it
|
|
||||||
will affect the server hardware selection. This could
|
|
||||||
ripple into the decisions that affect host density,
|
|
||||||
instance density, power density, OS-hypervisor,
|
|
||||||
management tools and others.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<para>General purpose OpenStack cloud has multiple options.
|
|
||||||
The key factors that will have an influence
|
|
||||||
on selection of storage hardware for a general purpose
|
|
||||||
OpenStack cloud are as follows:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Capacity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Hardware resources selected for the resource nodes
|
|
||||||
should be capable of supporting enough storage for the
|
|
||||||
cloud services. Defining the initial requirements and
|
|
||||||
ensuring the design can support adding capacity is
|
|
||||||
important. Hardware nodes selected for object storage
|
|
||||||
should be capable of support a large number of inexpensive
|
|
||||||
disks with no reliance on RAID controller cards.
|
|
||||||
Hardware nodes selected for block storage should be capable
|
|
||||||
of supporting high speed storage solutions and RAID controller
|
|
||||||
cards to provide performance and redundancy to storage at a
|
|
||||||
hardware level.
|
|
||||||
Selecting hardware RAID controllers that automatically repair
|
|
||||||
damaged arrays will assist with the replacement and repair of
|
|
||||||
degraded or deleted storage devices.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Performance</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Disks selected for object storage services do not need
|
|
||||||
to be fast performing disks. We recommend that object storage
|
|
||||||
nodes take advantage of the best cost per terabyte available
|
|
||||||
for storage. Contrastingly, disks chosen for block storage
|
|
||||||
services should take advantage of performance boosting
|
|
||||||
features that may entail the use of SSDs or flash storage
|
|
||||||
to provide high performance block storage pools. Storage
|
|
||||||
performance of ephemeral disks used for instances should
|
|
||||||
also be taken into consideration.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Fault tolerance</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Object storage resource nodes have
|
|
||||||
no requirements for hardware fault tolerance or RAID
|
|
||||||
controllers. It is not necessary to plan for fault
|
|
||||||
tolerance within the object storage hardware because
|
|
||||||
the object storage service provides replication
|
|
||||||
between zones as a feature of the service. Block
|
|
||||||
storage nodes, compute nodes, and cloud controllers
|
|
||||||
should all have fault tolerance built in at the
|
|
||||||
hardware level by making use of hardware RAID
|
|
||||||
controllers and varying levels of RAID configuration.
|
|
||||||
The level of RAID chosen should be consistent with the
|
|
||||||
performance and availability requirements of the
|
|
||||||
cloud.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="selecting-networking-hardware">
|
|
||||||
<title>Selecting networking hardware</title>
|
|
||||||
<para>Selecting network architecture determines which network
|
|
||||||
hardware will be used. Networking software is determined by
|
|
||||||
the selected networking hardware.</para>
|
|
||||||
<para>There are more subtle design impacts that need to be considered.
|
|
||||||
The selection of certain networking hardware (and the networking software)
|
|
||||||
affects the management tools that can be used. There are
|
|
||||||
exceptions to this; the rise of <emphasis>open</emphasis> networking software
|
|
||||||
that supports a range of networking hardware means that there
|
|
||||||
are instances where the relationship between networking
|
|
||||||
hardware and networking software are not as tightly defined.</para>
|
|
||||||
<para>Some of the key considerations that should be included in
|
|
||||||
the selection of networking hardware include:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Port count</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The design will require networking
|
|
||||||
hardware that has the requisite port count.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Port density</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The network design will be affected by
|
|
||||||
the physical space that is required to provide the
|
|
||||||
requisite port count. A higher port density is preferred,
|
|
||||||
as it leaves more rack space for compute or storage components
|
|
||||||
that may be required by the design. This can also lead into
|
|
||||||
concerns about fault domains and power density that
|
|
||||||
should be considered. Higher density switches are more
|
|
||||||
expensive and should also be considered, as it is
|
|
||||||
important not to over design the network if it is not
|
|
||||||
required.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Port speed</term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
The networking hardware must support the proposed
|
|
||||||
network speed, for example: 1 GbE, 10 GbE, or
|
|
||||||
40 GbE (or even 100 GbE).</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Redundancy</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The level of network hardware redundancy
|
|
||||||
required is influenced by the user requirements for
|
|
||||||
high availability and cost considerations. Network
|
|
||||||
redundancy can be achieved by adding redundant power
|
|
||||||
supplies or paired switches. If this is a requirement,
|
|
||||||
the hardware will need to support this configuration.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Power requirements</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Ensure that the physical data
|
|
||||||
center provides the necessary power for the selected
|
|
||||||
network hardware.</para>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
This may be an issue for spine switches in a leaf and
|
|
||||||
spine fabric, or end of row (EoR) switches.</para>
|
|
||||||
</note>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<para>There is no single best practice architecture for the
|
|
||||||
networking hardware supporting a general purpose OpenStack
|
|
||||||
cloud that will apply to all implementations. Some of the key
|
|
||||||
factors that will have a strong influence on selection of
|
|
||||||
networking hardware include:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Connectivity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>All nodes within an OpenStack cloud
|
|
||||||
require network connectivity. In some
|
|
||||||
cases, nodes require access to more than one network
|
|
||||||
segment. The design must encompass sufficient network
|
|
||||||
capacity and bandwidth to ensure that all
|
|
||||||
communications within the cloud, both north-south and
|
|
||||||
east-west traffic have sufficient resources
|
|
||||||
available.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Scalability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The network design should
|
|
||||||
encompass a physical and logical network design that
|
|
||||||
can be easily expanded upon. Network hardware should
|
|
||||||
offer the appropriate types of interfaces and speeds
|
|
||||||
that are required by the hardware nodes.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Availability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>To ensure that access to nodes within
|
|
||||||
the cloud is not interrupted, we recommend that
|
|
||||||
the network architecture identify any single points of
|
|
||||||
failure and provide some level of redundancy or fault
|
|
||||||
tolerance. With regard to the network infrastructure
|
|
||||||
itself, this often involves use of networking
|
|
||||||
protocols such as LACP, VRRP or others to achieve a
|
|
||||||
highly available network connection. In addition, it
|
|
||||||
is important to consider the networking implications
|
|
||||||
on API availability. In order to ensure that the APIs,
|
|
||||||
and potentially other services in the cloud are highly
|
|
||||||
available, we recommend you design a load balancing
|
|
||||||
solution within the network architecture to
|
|
||||||
accommodate for these requirements.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="software-selection">
|
|
||||||
<title>Software selection</title>
|
|
||||||
<para>Software selection for a general purpose OpenStack
|
|
||||||
architecture design needs to include these three areas:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Operating system (OS) and hypervisor</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack components</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Supplemental software</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="os-and-hypervisor">
|
|
||||||
<title>Operating system and hypervisor</title>
|
|
||||||
<para>The operating system (OS) and hypervisor have a
|
|
||||||
significant impact on the overall design. Selecting a particular
|
|
||||||
operating system and hypervisor can directly affect server
|
|
||||||
hardware selection. Make sure the storage
|
|
||||||
hardware and topology support the selected operating
|
|
||||||
system and hypervisor combination. Also ensure the networking
|
|
||||||
hardware selection and topology will work with the chosen operating
|
|
||||||
system and hypervisor combination.</para>
|
|
||||||
<para>Some areas that could be impacted by the selection of OS and
|
|
||||||
hypervisor include:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Cost</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Selecting a commercially supported hypervisor,
|
|
||||||
such as Microsoft Hyper-V, will result in a different
|
|
||||||
cost model rather than community-supported open source
|
|
||||||
hypervisors including <glossterm
|
|
||||||
baseform="kernel-based VM (KVM)">KVM</glossterm>,
|
|
||||||
Kinstance or <glossterm>Xen</glossterm>. When
|
|
||||||
comparing open source OS solutions, choosing Ubuntu
|
|
||||||
over Red Hat (or vice versa) will have an impact on
|
|
||||||
cost due to support contracts.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Supportability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Depending on the selected
|
|
||||||
hypervisor, staff should have the appropriate
|
|
||||||
training and knowledge to support the selected OS and
|
|
||||||
hypervisor combination. If they do not, training will
|
|
||||||
need to be provided which could have a cost impact on
|
|
||||||
the design.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Management tools</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The management tools used for
|
|
||||||
Ubuntu and Kinstance differ from the management tools
|
|
||||||
for VMware vSphere. Although both OS and hypervisor
|
|
||||||
combinations are supported by OpenStack, there will be
|
|
||||||
very different impacts to the rest of the design as a
|
|
||||||
result of the selection of one combination versus the
|
|
||||||
other.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Scale and performance</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Ensure that selected OS and
|
|
||||||
hypervisor combinations meet the appropriate scale and
|
|
||||||
performance requirements. The chosen architecture will
|
|
||||||
need to meet the targeted instance-host ratios with
|
|
||||||
the selected OS-hypervisor combinations.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Security</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Ensure that the design can accommodate
|
|
||||||
regular periodic installations of application security
|
|
||||||
patches while maintaining required workloads. The
|
|
||||||
frequency of security patches for the proposed
|
|
||||||
OS-hypervisor combination will have an impact on
|
|
||||||
performance and the patch installation process could
|
|
||||||
affect maintenance windows.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Supported features</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Determine which features of OpenStack are required.
|
|
||||||
This will often determine the selection of the OS-hypervisor combination.
|
|
||||||
Some features are only available with specific operating systems or
|
|
||||||
hypervisors.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Interoperability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>You will need to consider how the OS and hypervisor combination
|
|
||||||
interactions with other operating systems and hypervisors, including
|
|
||||||
other software solutions.
|
|
||||||
Operational troubleshooting tools for one OS-hypervisor
|
|
||||||
combination may differ from the tools used for another OS-hypervisor
|
|
||||||
combination and, as a result, the design will need to
|
|
||||||
address if the two sets of tools need to interoperate.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="openstack-components">
|
|
||||||
<title>OpenStack components</title>
|
|
||||||
<para>Selecting which OpenStack components are included in the overall
|
|
||||||
design is important. Some OpenStack components, like
|
|
||||||
compute and Image service, are required in every architecture. Other
|
|
||||||
components, like Orchestration, are not always required.</para>
|
|
||||||
<para>Excluding certain OpenStack components can limit or constrain
|
|
||||||
the functionality of other components. For example, if the architecture includes
|
|
||||||
Orchestration but excludes Telemetry, then the design will not be able
|
|
||||||
to take advantage of Orchestrations' auto scaling functionality.
|
|
||||||
It is important to research the component interdependencies
|
|
||||||
in conjunction with the technical requirements before deciding
|
|
||||||
on the final architecture.</para>
|
|
||||||
|
|
||||||
<section xml:id="networking-software">
|
|
||||||
<title>Networking software</title>
|
|
||||||
<para>OpenStack Networking (neutron) provides a wide variety of networking
|
|
||||||
services for instances. There are many additional networking
|
|
||||||
software packages that can be useful when managing OpenStack
|
|
||||||
components. Some examples include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Software to provide load balancing
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Network redundancy protocols
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Routing daemons
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Some of these software packages are described
|
|
||||||
in more detail in the <citetitle>OpenStack High Availability
|
|
||||||
Guide</citetitle> (refer to the <link
|
|
||||||
xlink:href="http://docs.openstack.org/ha-guide/networking-ha.html">Network
|
|
||||||
controller cluster stack chapter</link> of the OpenStack High
|
|
||||||
Availability Guide).</para>
|
|
||||||
<para>For a general purpose OpenStack cloud, the OpenStack
|
|
||||||
infrastructure components need to be highly available. If
|
|
||||||
the design does not include hardware load balancing,
|
|
||||||
networking software packages like HAProxy will need to be
|
|
||||||
included.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="management-software">
|
|
||||||
<title>Management software</title>
|
|
||||||
<para>Selected supplemental software solution impacts and
|
|
||||||
affects the overall OpenStack cloud design. This includes
|
|
||||||
software for providing clustering, logging, monitoring and
|
|
||||||
alerting.</para>
|
|
||||||
<para>Inclusion of clustering software, such as Corosync or
|
|
||||||
Pacemaker, is determined primarily by the availability
|
|
||||||
requirements. The impact of including (or not
|
|
||||||
including) these software packages is primarily determined by
|
|
||||||
the availability of the cloud infrastructure and the
|
|
||||||
complexity of supporting the configuration after it is
|
|
||||||
deployed. The <link xlink:href="http://docs.openstack.org/ha-guide/"><citetitle>OpenStack High Availability Guide</citetitle></link>
|
|
||||||
provides more
|
|
||||||
details on the installation and configuration of Corosync and
|
|
||||||
Pacemaker, should these packages need to be included in the
|
|
||||||
design.</para>
|
|
||||||
<para>Requirements for logging, monitoring, and alerting are
|
|
||||||
determined by operational considerations. Each of these
|
|
||||||
sub-categories includes a number of various options.</para>
|
|
||||||
<para>If these software packages are required, the
|
|
||||||
design must account for the additional resource consumption
|
|
||||||
(CPU, RAM, storage, and network bandwidth). Some other potential
|
|
||||||
design impacts include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>OS-hypervisor combination: Ensure that the
|
|
||||||
selected logging, monitoring, or alerting tools
|
|
||||||
support the proposed OS-hypervisor combination.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Network hardware: The network hardware selection
|
|
||||||
needs to be supported by the logging, monitoring, and
|
|
||||||
alerting software.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="database-software">
|
|
||||||
<title>Database software</title>
|
|
||||||
<para>OpenStack components often require access
|
|
||||||
to back-end database services to store state and configuration
|
|
||||||
information. Selecting an appropriate back-end database
|
|
||||||
that satisfies the availability and fault tolerance
|
|
||||||
requirements of the OpenStack services is required. OpenStack
|
|
||||||
services supports connecting to a database that is supported
|
|
||||||
by the SQLAlchemy python drivers, however, most common
|
|
||||||
database deployments make use of MySQL or variations of it. We
|
|
||||||
recommend that the database, which provides back-end
|
|
||||||
service within a general purpose cloud, be made highly
|
|
||||||
available when using an available technology which can
|
|
||||||
accomplish that goal.</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,156 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="operational-considerations-general-purpose">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Operational considerations</title>
|
|
||||||
<para>In the planning and design phases of the build out, it is
|
|
||||||
important to include the operation's function. Operational
|
|
||||||
factors affect the design choices for a general purpose cloud,
|
|
||||||
and operations staff are often tasked with the maintenance of
|
|
||||||
cloud environments for larger installations.</para>
|
|
||||||
<para>Expectations set by the Service Level Agreements (SLAs) directly
|
|
||||||
affect knowing when and where you should implement redundancy and
|
|
||||||
high availability. SLAs are contractual
|
|
||||||
obligations that provide assurances for service availability.
|
|
||||||
They define the levels of availability that drive the technical
|
|
||||||
design, often with penalties for not meeting contractual obligations.</para>
|
|
||||||
<para>SLA terms that affect design include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>API availability guarantees implying multiple
|
|
||||||
infrastructure services and highly available
|
|
||||||
load balancers.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Network uptime guarantees affecting switch
|
|
||||||
design, which might require redundant switching and
|
|
||||||
power.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Factor in networking security policy requirements
|
|
||||||
in to your deployments.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
|
|
||||||
<section xml:id="support-and-maintainability-general-purpose">
|
|
||||||
<title>Support and maintainability</title>
|
|
||||||
<para>To be able to support and maintain an installation, OpenStack
|
|
||||||
cloud management requires operations staff to understand and
|
|
||||||
comprehend design architecture content. The operations and engineering
|
|
||||||
staff skill level, and level of separation, are dependent on size and
|
|
||||||
purpose of the installation. Large cloud service providers, or telecom
|
|
||||||
providers, are more likely to be managed by specially trained, dedicated
|
|
||||||
operations organizations. Smaller implementations are more likely to rely
|
|
||||||
on support staff that need to take on combined engineering, design and
|
|
||||||
operations functions.</para>
|
|
||||||
<para>Maintaining OpenStack installations requires a
|
|
||||||
variety of technical skills. You may want to consider using a third-party
|
|
||||||
management company with special expertise in managing
|
|
||||||
OpenStack deployment.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="monitoring-general-purpose">
|
|
||||||
<title>Monitoring</title>
|
|
||||||
<para>OpenStack clouds require appropriate monitoring platforms to
|
|
||||||
ensure errors are caught and managed appropriately. Specific
|
|
||||||
meters that are critically important to monitor include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Image disk utilization
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Response time to the Compute API
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Leveraging existing monitoring systems is an effective check to
|
|
||||||
ensure OpenStack environments can be monitored.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="downtime-general-purpose">
|
|
||||||
<title>Downtime</title>
|
|
||||||
<para>To effectively run cloud installations, initial downtime planning
|
|
||||||
includes creating processes and architectures that support
|
|
||||||
the following:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Planned (maintenance)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Unplanned (system faults)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Resiliency of overall system and individual components are going
|
|
||||||
to be dictated by the requirements of the SLA, meaning designing
|
|
||||||
for high availability (HA) can have cost ramifications.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="capacity-planning">
|
|
||||||
<title>Capacity planning</title>
|
|
||||||
<para>Capacity constraints for a general purpose cloud environment
|
|
||||||
include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Compute limits
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Storage limits
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>A relationship exists between the size of the compute environment
|
|
||||||
and the supporting OpenStack infrastructure controller nodes requiring
|
|
||||||
support.</para>
|
|
||||||
<para>Increasing the size of the supporting compute environment increases
|
|
||||||
the network traffic and messages, adding load to the controller or
|
|
||||||
networking nodes. Effective monitoring of the environment will help
|
|
||||||
with capacity decisions on scaling.</para>
|
|
||||||
<para>Compute nodes automatically attach to OpenStack clouds, resulting in
|
|
||||||
a horizontally scaling process when adding extra compute capacity to an
|
|
||||||
OpenStack cloud. Additional processes are required to place nodes into
|
|
||||||
appropriate availability zones and host aggregates. When adding additional
|
|
||||||
compute nodes to environments, ensure identical or functional compatible
|
|
||||||
CPUs are used, otherwise live migration features will break. It is necessary
|
|
||||||
to add rack capacity or network switches as scaling out compute hosts directly
|
|
||||||
affects network and datacenter resources.</para>
|
|
||||||
<para>Assessing the average workloads and increasing the number of instances
|
|
||||||
that can run within the compute environment by adjusting the overcommit
|
|
||||||
ratio is another option. It is important to remember that changing the CPU overcommit
|
|
||||||
ratio can have a detrimental effect and cause a potential increase in a
|
|
||||||
noisy neighbor. The additional risk of increasing the overcommit ratio is
|
|
||||||
more instances failing when a compute host fails.</para>
|
|
||||||
<para>Compute host components can also be upgraded to account for
|
|
||||||
increases in demand; this is known as vertical scaling.
|
|
||||||
Upgrading CPUs with more cores, or increasing the overall
|
|
||||||
server memory, can add extra needed capacity depending on
|
|
||||||
whether the running applications are more CPU intensive or
|
|
||||||
memory intensive.</para>
|
|
||||||
<para>Insufficient disk capacity could also have a negative effect
|
|
||||||
on overall performance including CPU and memory usage.
|
|
||||||
Depending on the back-end architecture of the OpenStack Block
|
|
||||||
Storage layer, capacity includes adding disk shelves to
|
|
||||||
enterprise storage systems or installing additional block
|
|
||||||
storage nodes. Upgrading directly attached storage installed in
|
|
||||||
compute hosts, and adding capacity to the shared storage for
|
|
||||||
additional ephemeral storage to instances, may be necessary.</para>
|
|
||||||
<para>
|
|
||||||
For a deeper discussion on many of these topics, refer to the
|
|
||||||
<link
|
|
||||||
xlink:href="http://docs.openstack.org/ops"><citetitle>OpenStack
|
|
||||||
Operations Guide</citetitle></link>.
|
|
||||||
</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,101 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="prescriptive-example-online-classifieds">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Prescriptive example</title>
|
|
||||||
<para>An online classified advertising company wants to run web applications
|
|
||||||
consisting of Tomcat, Nginx and MariaDB in a private cloud. To be able
|
|
||||||
to meet policy requirements, the cloud infrastructure will run in their
|
|
||||||
own data center. The company has predictable load requirements, but requires
|
|
||||||
scaling to cope with nightly increases in demand. Their current environment
|
|
||||||
does not have the flexibility to align with their goal of running an open
|
|
||||||
source API environment. The current environment consists of the following:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Between 120 and 140 installations of Nginx and
|
|
||||||
Tomcat, each with 2 vCPUs and 4 GB of RAM</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>A three-node MariaDB and Galera cluster, each with 4
|
|
||||||
vCPUs and 8 GB RAM</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>The company runs hardware load balancers and multiple web
|
|
||||||
applications serving their websites, and orchestrates environments
|
|
||||||
using combinations of scripts and Puppet. The website generates large amounts of
|
|
||||||
log data daily that requires archiving.</para>
|
|
||||||
<para>The solution would consist of the following OpenStack
|
|
||||||
components:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>A firewall, switches and load balancers on the
|
|
||||||
public facing network connections.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Controller service running Image,
|
|
||||||
Identity, Networking, combined with support services such as
|
|
||||||
MariaDB and RabbitMQ, configured for high availability on at
|
|
||||||
least three controller nodes.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Compute nodes running the KVM
|
|
||||||
hypervisor.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Block Storage for use by compute instances,
|
|
||||||
requiring persistent storage (such as databases for
|
|
||||||
dynamic sites).</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Object Storage for serving static objects
|
|
||||||
(such as images).</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<mediaobject><imageobject><imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/General_Architecture3.png"
|
|
||||||
/></imageobject></mediaobject>
|
|
||||||
<para>Running up to 140
|
|
||||||
web instances and the small number of MariaDB instances
|
|
||||||
requires 292 vCPUs available, as well as 584 GB RAM. On a
|
|
||||||
typical 1U server using dual-socket hex-core Intel CPUs with
|
|
||||||
Hyperthreading, and assuming 2:1 CPU overcommit ratio, this
|
|
||||||
would require 8 OpenStack Compute nodes.</para>
|
|
||||||
<para>The web application instances run from local storage on each
|
|
||||||
of the OpenStack Compute nodes. The web application instances
|
|
||||||
are stateless, meaning that any of the instances can fail and
|
|
||||||
the application will continue to function.</para>
|
|
||||||
<para>MariaDB server instances store their data on shared
|
|
||||||
enterprise storage, such as NetApp or Solidfire devices. If a
|
|
||||||
MariaDB instance fails, storage would be expected to be
|
|
||||||
re-attached to another instance and rejoined to the Galera
|
|
||||||
cluster.</para>
|
|
||||||
<para>Logs from the web application servers are shipped to
|
|
||||||
OpenStack Object Storage for processing and
|
|
||||||
archiving.</para>
|
|
||||||
<para>Additional capabilities can be realized by
|
|
||||||
moving static web content to be served from OpenStack Object
|
|
||||||
Storage containers, and backing the OpenStack Image service
|
|
||||||
with OpenStack Object Storage.</para>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
Increasing OpenStack Object Storage means network bandwidth
|
|
||||||
needs to be taken into consideration. Running OpenStack Object
|
|
||||||
Storage with network connections offering 10 GbE or better connectivity
|
|
||||||
is advised.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
<para>Leveraging Orchestration and Telemetry services is also a potential issue when
|
|
||||||
providing auto-scaling, orchestrated web application environments.
|
|
||||||
Defining the web applications in <glossterm
|
|
||||||
baseform="Heat Orchestration Template (HOT)">Heat Orchestration Templates (HOT)</glossterm>
|
|
||||||
negates the reliance on the current scripted Puppet solution.</para>
|
|
||||||
<para>OpenStack Networking can be used to control hardware load
|
|
||||||
balancers through the use of plug-ins and the Networking API.
|
|
||||||
This allows users to control hardware load balance pools
|
|
||||||
and instances as members in these pools, but their use in
|
|
||||||
production environments must be carefully weighed against
|
|
||||||
current stability.</para>
|
|
||||||
</section>
|
|
@ -1,738 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE section [
|
|
||||||
<!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
|
|
||||||
%openstack;
|
|
||||||
]>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="technical-considerations-general-purpose">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Technical considerations</title>
|
|
||||||
<para>General purpose clouds are expected to
|
|
||||||
include these base services:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Compute
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Network
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Storage
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Each of these services have different resource requirements.
|
|
||||||
As a result, you must make design decisions relating directly
|
|
||||||
to the service, as well as provide a balanced infrastructure for
|
|
||||||
all services.</para>
|
|
||||||
<para>Take into consideration the unique aspects of each service, as
|
|
||||||
individual characteristics and service mass can impact the hardware
|
|
||||||
selection process. Hardware designs should be generated for each of the
|
|
||||||
services.</para>
|
|
||||||
<para>Hardware decisions are also made in relation to network architecture
|
|
||||||
and facilities planning. These factors play heavily into
|
|
||||||
the overall architecture of an OpenStack cloud.</para>
|
|
||||||
|
|
||||||
<section xml:id="designing-compute-resources-tech-considerations">
|
|
||||||
<title>Compute resource design</title>
|
|
||||||
<para>When designing compute resource pools, a number of factors
|
|
||||||
can impact your design decisions. Factors such as number of processors,
|
|
||||||
amount of memory, and the quantity of storage required for each hypervisor
|
|
||||||
must be taken into account.</para>
|
|
||||||
<para>You will also need to decide whether to provide compute resources
|
|
||||||
in a single pool or in multiple pools. In most cases, multiple pools
|
|
||||||
of resources can be allocated and addressed on demand. A compute design
|
|
||||||
that allocates multiple pools of resources makes best use of application
|
|
||||||
resources, and is commonly referred to as
|
|
||||||
<firstterm>bin packing</firstterm>.</para>
|
|
||||||
<para>In a bin packing design, each independent resource pool provides service
|
|
||||||
for specific flavors. This helps to ensure that, as instances are scheduled
|
|
||||||
onto compute hypervisors, each independent node's resources will be allocated
|
|
||||||
in a way that makes the most efficient use of the available hardware. Bin
|
|
||||||
packing also requires a common hardware design, with all hardware nodes within
|
|
||||||
a compute resource pool sharing a common processor, memory, and storage layout.
|
|
||||||
This makes it easier to deploy, support, and maintain nodes throughout their
|
|
||||||
life cycle.</para>
|
|
||||||
<para>An <firstterm>overcommit ratio</firstterm> is the ratio of available
|
|
||||||
virtual resources to available physical resources. This ratio is
|
|
||||||
configurable for CPU and memory. The default CPU overcommit ratio is 16:1, and
|
|
||||||
the default memory overcommit ratio is 1.5:1. Determining the tuning of the
|
|
||||||
overcommit ratios during the design phase is important as it has a direct
|
|
||||||
impact on the hardware layout of your compute nodes.</para>
|
|
||||||
<para>When selecting a processor, compare features and performance
|
|
||||||
characteristics. Some processors include features specific to virtualized
|
|
||||||
compute hosts, such as hardware-assisted virtualization, and technology
|
|
||||||
related to memory paging (also known as EPT shadowing). These types of features
|
|
||||||
can have a significant impact on the performance of your virtual machine.</para>
|
|
||||||
<para>You will also need to consider the compute requirements of non-hypervisor
|
|
||||||
nodes (sometimes referred to as resource nodes). This includes controller, object
|
|
||||||
storage, and block storage nodes, and networking services.</para>
|
|
||||||
<para>The number of processor cores and threads impacts the number of worker
|
|
||||||
threads which can be run on a resource node. Design decisions must relate
|
|
||||||
directly to the service being run on it, as well as provide a balanced
|
|
||||||
infrastructure for all services.</para>
|
|
||||||
<para>Workload can be unpredictable in a general purpose cloud, so consider
|
|
||||||
including the ability to add additional compute resource pools on demand.
|
|
||||||
In some cases, however, the demand for certain instance types or flavors may not
|
|
||||||
justify individual hardware design. In either case, start by allocating
|
|
||||||
hardware designs that are capable of servicing the most common instance
|
|
||||||
requests. If you want to add additional hardware to the overall architecture,
|
|
||||||
this can be done later.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="designing-network-resources-tech-considerations">
|
|
||||||
<title>Designing network resources</title>
|
|
||||||
<para>OpenStack clouds generally have multiple network segments, with
|
|
||||||
each segment providing access to particular resources. The network services
|
|
||||||
themselves also require network communication paths which should
|
|
||||||
be separated from the other networks. When designing network services
|
|
||||||
for a general purpose cloud, plan for either a physical or logical
|
|
||||||
separation of network segments used by operators and tenants. You can also
|
|
||||||
create an additional network segment for access to internal services such as
|
|
||||||
the message bus and database used by various services. Segregating these
|
|
||||||
services onto separate networks helps to protect sensitive data and protects
|
|
||||||
against unauthorized access to services.</para>
|
|
||||||
<para>Choose a networking service based on the requirements of your instances.
|
|
||||||
The architecture and design of your cloud will impact whether you choose
|
|
||||||
OpenStack Networking(neutron), or legacy networking (nova-network).</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Legacy networking (nova-network)</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The legacy networking (nova-network) service is primarily a
|
|
||||||
layer-2 networking service that functions in two modes, which
|
|
||||||
use VLANs in different ways. In a flat network mode, all
|
|
||||||
network hardware nodes and devices throughout the cloud are connected
|
|
||||||
to a single layer-2 network segment that provides access to
|
|
||||||
application data.</para>
|
|
||||||
<para>When the network devices in the cloud support segmentation
|
|
||||||
using VLANs, legacy networking can operate in the second mode. In
|
|
||||||
this design model, each tenant within the cloud is assigned a
|
|
||||||
network subnet which is mapped to a VLAN on the physical
|
|
||||||
network. It is especially important to remember the maximum
|
|
||||||
number of 4096 VLANs which can be used within a spanning tree
|
|
||||||
domain. This places a hard limit on the amount of
|
|
||||||
growth possible within the data center. When designing a
|
|
||||||
general purpose cloud intended to support multiple tenants, we
|
|
||||||
recommend the use of legacy networking with VLANs, and
|
|
||||||
not in flat network mode.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<para>Another consideration regarding network is the fact that
|
|
||||||
legacy networking is entirely managed by the cloud operator;
|
|
||||||
tenants do not have control over network resources. If tenants
|
|
||||||
require the ability to manage and create network resources
|
|
||||||
such as network segments and subnets, it will be necessary to
|
|
||||||
install the OpenStack Networking service to provide network
|
|
||||||
access to instances.</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>OpenStack Networking (neutron)</term>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Networking (neutron) is a first class networking
|
|
||||||
service that gives full control over creation of virtual
|
|
||||||
network resources to tenants. This is often accomplished in
|
|
||||||
the form of tunneling protocols which will establish
|
|
||||||
encapsulated communication paths over existing network
|
|
||||||
infrastructure in order to segment tenant traffic. These
|
|
||||||
methods vary depending on the specific implementation, but
|
|
||||||
some of the more common methods include tunneling over GRE,
|
|
||||||
encapsulating with VXLAN, and VLAN tags.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<para>We recommend you design at least three network segments:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>The first segment is a public network, used for access to REST APIs
|
|
||||||
by tenants and operators. The controller nodes and swift
|
|
||||||
proxies are the only devices connecting to this network segment. In some
|
|
||||||
cases, this network might also be serviced by hardware load balancers
|
|
||||||
and other network devices.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>The second segment is used by administrators to manage hardware resources.
|
|
||||||
Configuration management tools also use this for deploying software and
|
|
||||||
services onto new hardware. In some cases, this network segment might also be
|
|
||||||
used for internal services, including the message bus and database services.
|
|
||||||
This network needs to communicate with every hardware node.
|
|
||||||
Due to the highly sensitive nature of this network segment, you also need to
|
|
||||||
secure this network from unauthorized access.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>The third network segment is used by applications and consumers to access
|
|
||||||
the physical network, and for users to access applications. This network is
|
|
||||||
segregated from the one used to access the cloud APIs and is not
|
|
||||||
capable of communicating directly with the hardware resources in the cloud.
|
|
||||||
Compute resource nodes and network gateway services which allow application
|
|
||||||
data to access the physical network from outside of the cloud need to
|
|
||||||
communicate on this network segment.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="designing-openstack-object-storage-tech-considerations">
|
|
||||||
<title>Designing OpenStack Object Storage</title>
|
|
||||||
<para>When designing hardware resources for OpenStack Object
|
|
||||||
Storage, the primary goal is to maximize the amount of storage
|
|
||||||
in each resource node while also ensuring that the cost per
|
|
||||||
terabyte is kept to a minimum. This often involves utilizing
|
|
||||||
servers which can hold a large number of spinning disks.
|
|
||||||
Whether choosing to use 2U server form factors with directly
|
|
||||||
attached storage or an external chassis that holds a larger
|
|
||||||
number of drives, the main goal is to maximize the storage
|
|
||||||
available in each node.</para>
|
|
||||||
<note>
|
|
||||||
<para>We do not recommended investing in enterprise class drives
|
|
||||||
for an OpenStack Object Storage cluster. The consistency and
|
|
||||||
partition tolerance characteristics of OpenStack Object
|
|
||||||
Storage ensures that data stays up to date and survives
|
|
||||||
hardware faults without the use of any specialized data
|
|
||||||
replication devices.</para>
|
|
||||||
</note>
|
|
||||||
<para>One of the benefits of OpenStack Object Storage is the ability
|
|
||||||
to mix and match drives by making use of weighting within the
|
|
||||||
swift ring. When designing your swift storage cluster, we
|
|
||||||
recommend making use of the most cost effective storage
|
|
||||||
solution available at the time.</para>
|
|
||||||
<para>To achieve durability and availability of data stored as objects
|
|
||||||
it is important to design object storage resource pools to ensure they can
|
|
||||||
provide the suggested availability. Considering rack-level and zone-level
|
|
||||||
designs to accommodate the number of replicas configured to be stored in the
|
|
||||||
Object Storage service (the default number of replicas is three) is important
|
|
||||||
when designing beyond the hardware node level. Each replica of
|
|
||||||
data should exist in its own availability zone with its own
|
|
||||||
power, cooling, and network resources available to service
|
|
||||||
that specific zone.</para>
|
|
||||||
<para>Object storage nodes should be designed so that the number
|
|
||||||
of requests does not hinder the performance of the cluster.
|
|
||||||
The object storage service is a chatty protocol, therefore
|
|
||||||
making use of multiple processors that have higher core counts
|
|
||||||
will ensure the IO requests do not inundate the server.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="designing-openstack-block-storage">
|
|
||||||
<title>Designing OpenStack Block Storage</title>
|
|
||||||
<para>When designing OpenStack Block Storage resource nodes, it is
|
|
||||||
helpful to understand the workloads and requirements that will
|
|
||||||
drive the use of block storage in the cloud. We recommend designing
|
|
||||||
block storage pools so that tenants can choose appropriate storage
|
|
||||||
solutions for their applications. By creating multiple storage pools of different
|
|
||||||
types, in conjunction with configuring an advanced storage
|
|
||||||
scheduler for the block storage service, it is possible to
|
|
||||||
provide tenants with a large catalog of storage services with
|
|
||||||
a variety of performance levels and redundancy options.</para>
|
|
||||||
<para>Block storage also takes advantage of a number of enterprise storage
|
|
||||||
solutions. These are addressed via a plug-in driver developed by the
|
|
||||||
hardware vendor. A large number of
|
|
||||||
enterprise storage plug-in drivers ship out-of-the-box with
|
|
||||||
OpenStack Block Storage (and many more available via third
|
|
||||||
party channels). General purpose clouds are more likely to use
|
|
||||||
directly attached storage in the majority of block storage nodes,
|
|
||||||
deeming it necessary to provide additional levels of service to tenants
|
|
||||||
which can only be provided by enterprise class storage solutions.</para>
|
|
||||||
<para>Redundancy and availability requirements impact the decision to use
|
|
||||||
a RAID controller card in block storage nodes. The input-output per second (IOPS)
|
|
||||||
demand of your application will influence whether or not you should use a RAID
|
|
||||||
controller, and which level of RAID is required.
|
|
||||||
Making use of higher performing RAID volumes is suggested when
|
|
||||||
considering performance. However, where redundancy of
|
|
||||||
block storage volumes is more important we recommend
|
|
||||||
making use of a redundant RAID configuration such as RAID 5 or
|
|
||||||
RAID 6. Some specialized features, such as automated
|
|
||||||
replication of block storage volumes, may require the use of
|
|
||||||
third-party plug-ins and enterprise block storage solutions in
|
|
||||||
order to provide the high demand on storage. Furthermore,
|
|
||||||
where extreme performance is a requirement it may also be
|
|
||||||
necessary to make use of high speed SSD disk drives' high
|
|
||||||
performing flash storage solutions.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="software-selection-tech-considerations">
|
|
||||||
<title>Software selection</title>
|
|
||||||
<para>The software selection process plays a large role in the
|
|
||||||
architecture of a general purpose cloud. The following have
|
|
||||||
a large impact on the design of the cloud:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Choice of operating system
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Selection of OpenStack software components
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Choice of hypervisor
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Selection of supplemental software
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Operating system (OS) selection plays a large role in the
|
|
||||||
design and architecture of a cloud. There are a number of OSes
|
|
||||||
which have native support for OpenStack including:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Ubuntu
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Red Hat Enterprise Linux (RHEL)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
CentOS
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
SUSE Linux Enterprise Server (SLES)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<note>
|
|
||||||
<para>Native support is not a constraint on the choice of OS; users are
|
|
||||||
free to choose just about any Linux distribution (or even
|
|
||||||
Microsoft Windows) and install OpenStack directly from source
|
|
||||||
(or compile their own packages). However, many organizations will
|
|
||||||
prefer to install OpenStack from distribution-supplied packages or
|
|
||||||
repositories (although using the distribution vendor's OpenStack
|
|
||||||
packages might be a requirement for support).
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
<para>OS selection also directly influences hypervisor selection.
|
|
||||||
A cloud architect who selects Ubuntu, RHEL, or SLES has some
|
|
||||||
flexibility in hypervisor; KVM, Xen, and LXC are supported
|
|
||||||
virtualization methods available under OpenStack Compute
|
|
||||||
(nova) on these Linux distributions. However, a cloud architect
|
|
||||||
who selects Hyper-V is limited to Windows Servers. Similarly, a
|
|
||||||
cloud architect who selects XenServer is limited to the CentOS-based
|
|
||||||
dom0 operating system provided with XenServer.</para>
|
|
||||||
<para>The primary factors that play into OS-hypervisor selection
|
|
||||||
include:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>User requirements</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The selection of OS-hypervisor
|
|
||||||
combination first and foremost needs to support the
|
|
||||||
user requirements.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Support</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The selected OS-hypervisor combination
|
|
||||||
needs to be supported by OpenStack.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Interoperability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The OS-hypervisor needs to be
|
|
||||||
interoperable with other features and services in the
|
|
||||||
OpenStack design in order to meet the user
|
|
||||||
requirements.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="hypervisor-tech-considerations">
|
|
||||||
<title>Hypervisor</title>
|
|
||||||
<para>OpenStack supports a wide variety of hypervisors, one or
|
|
||||||
more of which can be used in a single cloud. These hypervisors
|
|
||||||
include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>KVM (and QEMU)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>XCP/XenServer</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>vSphere (vCenter and ESXi)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Hyper-V</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>LXC</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Docker</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Bare-metal</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>A complete list of supported hypervisors and their
|
|
||||||
capabilities can be found at
|
|
||||||
<link xlink:href="https://wiki.openstack.org/wiki/HypervisorSupportMatrix">OpenStack Hypervisor Support Matrix</link>.
|
|
||||||
</para>
|
|
||||||
<para>We recommend general purpose clouds use hypervisors that
|
|
||||||
support the most general purpose use cases, such as KVM and
|
|
||||||
Xen. More specific hypervisors should be chosen to account
|
|
||||||
for specific functionality or a supported feature requirement.
|
|
||||||
In some cases, there may also be a mandated
|
|
||||||
requirement to run software on a certified hypervisor
|
|
||||||
including solutions from VMware, Microsoft, and Citrix.</para>
|
|
||||||
<para>The features offered through the OpenStack cloud platform
|
|
||||||
determine the best choice of a hypervisor. Each hypervisor
|
|
||||||
has their own hardware requirements which may affect the decisions
|
|
||||||
around designing a general purpose cloud.</para>
|
|
||||||
<para>In a mixed hypervisor environment, specific aggregates of
|
|
||||||
compute resources, each with defined capabilities, enable
|
|
||||||
workloads to utilize software and hardware specific to their
|
|
||||||
particular requirements. This functionality can be exposed
|
|
||||||
explicitly to the end user, or accessed through defined
|
|
||||||
metadata within a particular flavor of an instance.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="openstack-components-tech-considerations">
|
|
||||||
<title>OpenStack components</title>
|
|
||||||
<para>A general purpose OpenStack cloud design should incorporate
|
|
||||||
the core OpenStack services to provide a wide range of
|
|
||||||
services to end-users. The OpenStack core services recommended
|
|
||||||
in a general purpose cloud are:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack <glossterm>Compute</glossterm>
|
|
||||||
(<glossterm>nova</glossterm>)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack <glossterm>Networking</glossterm>
|
|
||||||
(<glossterm>neutron</glossterm>)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack <glossterm>Image service</glossterm>
|
|
||||||
(<glossterm>glance</glossterm>)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack <glossterm>Identity</glossterm>
|
|
||||||
(<glossterm>keystone</glossterm>)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack <glossterm>dashboard</glossterm>
|
|
||||||
(<glossterm>horizon</glossterm>)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para><glossterm>Telemetry</glossterm>
|
|
||||||
(<glossterm>ceilometer</glossterm>)</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>A general purpose cloud may also include OpenStack
|
|
||||||
<glossterm>Object Storage</glossterm> (<glossterm>swift</glossterm>).
|
|
||||||
OpenStack <glossterm>Block Storage</glossterm>
|
|
||||||
(<glossterm>cinder</glossterm>). These may be
|
|
||||||
selected to provide storage to applications and
|
|
||||||
instances.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="supplemental-software-tech-considerations">
|
|
||||||
<title>Supplemental software</title>
|
|
||||||
<para>A general purpose OpenStack deployment consists of more than
|
|
||||||
just OpenStack-specific components. A typical deployment
|
|
||||||
involves services that provide supporting functionality,
|
|
||||||
including databases and message queues, and may also involve
|
|
||||||
software to provide high availability of the OpenStack
|
|
||||||
environment. Design decisions around the underlying message
|
|
||||||
queue might affect the required number of controller services,
|
|
||||||
as well as the technology to provide highly resilient database
|
|
||||||
functionality, such as MariaDB with Galera. In such a
|
|
||||||
scenario, replication of services relies on quorum.</para>
|
|
||||||
<para>Where many general purpose deployments use hardware load
|
|
||||||
balancers to provide highly available API access and SSL
|
|
||||||
termination, software solutions, for example HAProxy, can also
|
|
||||||
be considered. It is vital to ensure that such software
|
|
||||||
implementations are also made highly available. High
|
|
||||||
availability can be achieved by using software such as
|
|
||||||
Keepalived or Pacemaker with Corosync. Pacemaker and Corosync
|
|
||||||
can provide active-active or active-passive highly available
|
|
||||||
configuration depending on the specific service in the
|
|
||||||
OpenStack environment. Using this software can affect the
|
|
||||||
design as it assumes at least a 2-node controller
|
|
||||||
infrastructure where one of those nodes may be running certain
|
|
||||||
services in standby mode.</para>
|
|
||||||
<para>Memcached is a distributed memory object caching system, and
|
|
||||||
Redis is a key-value store. Both are deployed on
|
|
||||||
general purpose clouds to assist in alleviating load to the
|
|
||||||
Identity service. The memcached service caches tokens, and due
|
|
||||||
to its distributed nature it can help alleviate some
|
|
||||||
bottlenecks to the underlying authentication system. Using
|
|
||||||
memcached or Redis does not affect the overall design of your
|
|
||||||
architecture as they tend to be deployed onto the
|
|
||||||
infrastructure nodes providing the OpenStack services.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="controller-infrastructure-tech-considerations">
|
|
||||||
<title>Controller infrastructure</title>
|
|
||||||
<para>The Controller infrastructure nodes provide management
|
|
||||||
services to the end-user as well as providing services
|
|
||||||
internally for the operating of the cloud. The Controllers
|
|
||||||
run message queuing services that carry system
|
|
||||||
messages between each service. Performance issues related to
|
|
||||||
the message bus would lead to delays in sending that message
|
|
||||||
to where it needs to go. The result of this condition would be
|
|
||||||
delays in operation functions such as spinning up and deleting
|
|
||||||
instances, provisioning new storage volumes and managing
|
|
||||||
network resources. Such delays could adversely affect an
|
|
||||||
application’s ability to react to certain conditions,
|
|
||||||
especially when using auto-scaling features. It is important
|
|
||||||
to properly design the hardware used to run the controller
|
|
||||||
infrastructure as outlined above in the Hardware Selection
|
|
||||||
section.</para>
|
|
||||||
<para>Performance of the controller services is not limited
|
|
||||||
to processing power, but restrictions may emerge in serving
|
|
||||||
concurrent users. Ensure that the APIs and Horizon services
|
|
||||||
are load tested to ensure that you are able to serve your
|
|
||||||
customers. Particular attention should be made to the
|
|
||||||
OpenStack Identity Service (Keystone), which provides the
|
|
||||||
authentication and authorization for all services, both
|
|
||||||
internally to OpenStack itself and to end-users. This service
|
|
||||||
can lead to a degradation of overall performance if this is
|
|
||||||
not sized appropriately.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="network-performance-tech-considerations">
|
|
||||||
<title>Network performance</title>
|
|
||||||
<para>In a general purpose OpenStack cloud, the requirements of
|
|
||||||
the network help determine performance capabilities.
|
|
||||||
It is possible to design OpenStack
|
|
||||||
environments that run a mix of networking capabilities. By
|
|
||||||
utilizing the different interface speeds, the users of the
|
|
||||||
OpenStack environment can choose networks that are fit for
|
|
||||||
their purpose.</para>
|
|
||||||
<para>Network performance can be boosted considerably by
|
|
||||||
implementing hardware load balancers to provide front-end
|
|
||||||
service to the cloud APIs. The hardware load balancers also
|
|
||||||
perform SSL termination if that is a requirement of your
|
|
||||||
environment. When implementing SSL offloading, it is important
|
|
||||||
to understand the SSL offloading capabilities of the devices
|
|
||||||
selected.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="compute-host-tech-considerations">
|
|
||||||
<title>Compute host</title>
|
|
||||||
<para>The choice of hardware specifications used in compute nodes
|
|
||||||
including CPU, memory and disk type directly affects the
|
|
||||||
performance of the instances. Other factors which can directly
|
|
||||||
affect performance include tunable parameters within the
|
|
||||||
OpenStack services, for example the overcommit ratio applied
|
|
||||||
to resources. The defaults in OpenStack Compute set a 16:1
|
|
||||||
over-commit of the CPU and 1.5 over-commit of the memory.
|
|
||||||
Running at such high ratios leads to an increase in
|
|
||||||
"noisy-neighbor" activity. Care must be taken when sizing your
|
|
||||||
Compute environment to avoid this scenario. For running
|
|
||||||
general purpose OpenStack environments it is possible to keep
|
|
||||||
to the defaults, but make sure to monitor your environment as
|
|
||||||
usage increases.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="storage-performance-tech-considerations">
|
|
||||||
<title>Storage performance</title>
|
|
||||||
<para>When considering performance of OpenStack Block Storage,
|
|
||||||
hardware and architecture choice is important. Block Storage
|
|
||||||
can use enterprise back-end systems such as NetApp or EMC,
|
|
||||||
scale out storage such as GlusterFS and Ceph, or simply use
|
|
||||||
the capabilities of directly attached storage in the nodes
|
|
||||||
themselves. Block Storage may be deployed so that traffic
|
|
||||||
traverses the host network, which could affect, and be
|
|
||||||
adversely affected by, the front-side API traffic performance.
|
|
||||||
As such, consider using a dedicated data storage network with
|
|
||||||
dedicated interfaces on the Controller and Compute
|
|
||||||
hosts.</para>
|
|
||||||
<para>When considering performance of OpenStack Object Storage, a
|
|
||||||
number of design choices will affect performance. A user’s
|
|
||||||
access to the Object Storage is through the proxy services,
|
|
||||||
which sit behind hardware load balancers. By the
|
|
||||||
very nature of a highly resilient storage system, replication
|
|
||||||
of the data would affect performance of the overall system. In
|
|
||||||
this case, 10 GbE (or better) networking is recommended
|
|
||||||
throughout the storage network architecture.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="availability-tech-considerations">
|
|
||||||
<title>Availability</title>
|
|
||||||
<para>In OpenStack, the infrastructure is integral to providing
|
|
||||||
services and should always be available, especially when
|
|
||||||
operating with SLAs. Ensuring network availability is
|
|
||||||
accomplished by designing the network architecture so that no
|
|
||||||
single point of failure exists. A consideration of the number
|
|
||||||
of switches, routes and redundancies of power should be
|
|
||||||
factored into core infrastructure, as well as the associated
|
|
||||||
bonding of networks to provide diverse routes to your highly
|
|
||||||
available switch infrastructure.</para>
|
|
||||||
<para>The OpenStack services themselves should be deployed across
|
|
||||||
multiple servers that do not represent a single point of
|
|
||||||
failure. Ensuring API availability can be achieved by placing
|
|
||||||
these services behind highly available load balancers that
|
|
||||||
have multiple OpenStack servers as members.</para>
|
|
||||||
<para>OpenStack lends itself to deployment in a highly available
|
|
||||||
manner where it is expected that at least 2 servers be
|
|
||||||
utilized. These can run all the services involved from the
|
|
||||||
message queuing service, for example RabbitMQ or QPID, and an
|
|
||||||
appropriately deployed database service such as MySQL or
|
|
||||||
MariaDB. As services in the cloud are scaled out, back-end
|
|
||||||
services will need to scale too. Monitoring and reporting on
|
|
||||||
server utilization and response times, as well as load testing
|
|
||||||
your systems, will help determine scale out decisions.</para>
|
|
||||||
<para>Care must be taken when deciding network functionality.
|
|
||||||
Currently, OpenStack supports both the legacy networking (nova-network)
|
|
||||||
system and the newer, extensible OpenStack Networking (neutron). Both
|
|
||||||
have their pros and cons when it comes to providing highly
|
|
||||||
available access. Legacy networking, which provides networking
|
|
||||||
access maintained in the OpenStack Compute code, provides a
|
|
||||||
feature that removes a single point of failure when it comes
|
|
||||||
to routing, and this feature is currently missing in OpenStack
|
|
||||||
Networking. The effect of legacy networking’s multi-host
|
|
||||||
functionality restricts failure domains to the host running
|
|
||||||
that instance.</para>
|
|
||||||
<para>When using OpenStack Networking, the
|
|
||||||
OpenStack controller servers or separate Networking
|
|
||||||
hosts handle routing. For a deployment that requires features
|
|
||||||
available in only Networking, it is possible to
|
|
||||||
remove this restriction by using third party software that
|
|
||||||
helps maintain highly available L3 routes. Doing so allows for
|
|
||||||
common APIs to control network hardware, or to provide complex
|
|
||||||
multi-tier web applications in a secure manner. It is also
|
|
||||||
possible to completely remove routing from
|
|
||||||
Networking, and instead rely on hardware routing capabilities.
|
|
||||||
In this case, the switching infrastructure must support L3
|
|
||||||
routing.</para>
|
|
||||||
<para>OpenStack Networking and legacy networking
|
|
||||||
both have their advantages and
|
|
||||||
disadvantages. They are both valid and supported options that
|
|
||||||
fit different network deployment models described in the
|
|
||||||
<citetitle><link
|
|
||||||
xlink:href="http://docs.openstack.org/openstack-ops/content/network_design.html#network_deployment_options"
|
|
||||||
>OpenStack Operations Guide</link></citetitle>.</para>
|
|
||||||
<para>Ensure your deployment has adequate back-up capabilities.</para>
|
|
||||||
<para>Application design must also be factored into the
|
|
||||||
capabilities of the underlying cloud infrastructure. If the
|
|
||||||
compute hosts do not provide a seamless live migration
|
|
||||||
capability, then it must be expected that when a compute host
|
|
||||||
fails, that instance and any data local to that instance will
|
|
||||||
be deleted. However, when providing an expectation to users
|
|
||||||
that instances have a high-level of uptime guarantees, the
|
|
||||||
infrastructure must be deployed in a way that eliminates any
|
|
||||||
single point of failure when a compute host disappears. This
|
|
||||||
may include utilizing shared file systems on enterprise
|
|
||||||
storage or OpenStack Block storage to provide a level of
|
|
||||||
guarantee to match service features.</para>
|
|
||||||
<para>For more information on high availability in OpenStack, see the <link
|
|
||||||
xlink:href="http://docs.openstack.org/ha-guide/"><citetitle>OpenStack
|
|
||||||
High Availability Guide</citetitle></link>.
|
|
||||||
</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="security-tech-considerations">
|
|
||||||
<title>Security</title>
|
|
||||||
<para>A security domain comprises users, applications, servers or
|
|
||||||
networks that share common trust requirements and expectations
|
|
||||||
within a system. Typically they have the same authentication
|
|
||||||
and authorization requirements and users.</para>
|
|
||||||
<para>These security domains are:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Public</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Guest</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Management</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Data</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>These security domains can be mapped to an OpenStack
|
|
||||||
deployment individually, or combined. In each case, the cloud operator
|
|
||||||
should be aware of the appropriate security concerns. Security
|
|
||||||
domains should be mapped out against your specific OpenStack
|
|
||||||
deployment topology. The domains and their trust requirements
|
|
||||||
depend upon whether the cloud instance is public, private, or
|
|
||||||
hybrid.</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>The public security domain is an entirely untrusted area of
|
|
||||||
the cloud infrastructure. It can refer to the internet as a
|
|
||||||
whole or simply to networks over which you have no authority.
|
|
||||||
This domain should always be considered untrusted.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>The guest security domain handles compute data generated by
|
|
||||||
instances on the cloud but not services that support the
|
|
||||||
operation of the cloud, such as API calls. Public cloud
|
|
||||||
providers and private cloud providers who do not have
|
|
||||||
stringent controls on instance use or who allow unrestricted
|
|
||||||
internet access to instances should consider this domain to be
|
|
||||||
untrusted. Private cloud providers may want to consider this
|
|
||||||
network as internal and therefore trusted only if they have
|
|
||||||
controls in place to assert that they trust instances and all
|
|
||||||
their tenants.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>The management security domain is where services interact.
|
|
||||||
Sometimes referred to as the <emphasis>control plane</emphasis>, the networks
|
|
||||||
in this domain transport confidential data such as configuration
|
|
||||||
parameters, user names, and passwords. In most deployments this
|
|
||||||
domain is considered trusted.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>The data security domain is concerned primarily with
|
|
||||||
information pertaining to the storage services within
|
|
||||||
OpenStack. Much of the data that crosses this network has high
|
|
||||||
integrity and confidentiality requirements and, depending on
|
|
||||||
the type of deployment, may also have strong availability
|
|
||||||
requirements. The trust level of this network is heavily
|
|
||||||
dependent on other deployment decisions.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>When deploying OpenStack in an enterprise as a private cloud
|
|
||||||
it is usually behind the firewall and within the trusted
|
|
||||||
network alongside existing systems. Users of the cloud are
|
|
||||||
employees that are bound by the security
|
|
||||||
requirements set forth by the company. This tends to push most
|
|
||||||
of the security domains towards a more trusted model. However,
|
|
||||||
when deploying OpenStack in a public facing role, no
|
|
||||||
assumptions can be made and the attack vectors significantly
|
|
||||||
increase.</para>
|
|
||||||
<para>Consideration must be taken when managing the users of the
|
|
||||||
system for both public and private clouds. The identity
|
|
||||||
service allows for LDAP to be part of the authentication
|
|
||||||
process. Including such systems in an OpenStack deployment may
|
|
||||||
ease user management if integrating into existing
|
|
||||||
systems.</para>
|
|
||||||
<para>It is important to understand that user authentication
|
|
||||||
requests include sensitive information including user names,
|
|
||||||
passwords, and authentication tokens. For this reason, placing
|
|
||||||
the API services behind hardware that performs SSL termination
|
|
||||||
is strongly recommended.</para>
|
|
||||||
<para>
|
|
||||||
For more information OpenStack Security, see the <link
|
|
||||||
xlink:href="http://docs.openstack.org/security-guide/"><citetitle>OpenStack
|
|
||||||
Security Guide</citetitle></link>
|
|
||||||
</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,155 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="user-requirements-general-purpose">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>User requirements</title>
|
|
||||||
<para>When building a general purpose cloud, you should follow the
|
|
||||||
<glossterm baseform="IaaS">Infrastructure-as-a-Service (IaaS)</glossterm>
|
|
||||||
model; a platform best suited for use cases with simple requirements.
|
|
||||||
General purpose cloud user requirements are not complex.
|
|
||||||
However, it is important to capture them even
|
|
||||||
if the project has minimum business and technical requirements, such as a
|
|
||||||
proof of concept (PoC), or a small lab platform.</para>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
The following user considerations are written from the perspective of
|
|
||||||
the cloud builder, not from the perspective of the end user.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Cost</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Financial factors are a primary concern for
|
|
||||||
any organization. Cost is an important criterion
|
|
||||||
as general purpose clouds are considered the baseline
|
|
||||||
from which all other cloud architecture environments
|
|
||||||
derive. General purpose clouds do not always provide
|
|
||||||
the most cost-effective environment for specialized
|
|
||||||
applications or situations. Unless razor-thin margins and costs have
|
|
||||||
been mandated as a critical factor, cost should not be
|
|
||||||
the sole consideration when choosing or designing a
|
|
||||||
general purpose architecture.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Time to market</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The ability to deliver services or products within
|
|
||||||
a flexible time frame is a common business factor
|
|
||||||
when building a general purpose cloud.
|
|
||||||
Delivering a product in six months instead
|
|
||||||
of two years is a driving force behind the
|
|
||||||
decision to build general purpose clouds. General
|
|
||||||
purpose clouds allow users to self-provision and gain
|
|
||||||
access to compute, network, and storage resources
|
|
||||||
on-demand thus decreasing time to market.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Revenue opportunity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Revenue opportunities for a
|
|
||||||
cloud will vary greatly based on the intended
|
|
||||||
use case of that particular cloud. Some general
|
|
||||||
purpose clouds are built for commercial customer
|
|
||||||
facing products, but there are alternatives
|
|
||||||
that might make the general purpose cloud the right
|
|
||||||
choice.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<section xml:id="technical-requirements">
|
|
||||||
<title>Technical requirements</title>
|
|
||||||
<para>Technical cloud architecture requirements should be weighted
|
|
||||||
against the business requirements.
|
|
||||||
</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Performance</term>
|
|
||||||
<listitem>
|
|
||||||
<para>As a baseline product, general purpose
|
|
||||||
clouds do not provide optimized performance for any
|
|
||||||
particular function. While a general purpose cloud
|
|
||||||
should provide enough performance to satisfy average
|
|
||||||
user considerations, performance is not a general
|
|
||||||
purpose cloud customer driver.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>No predefined usage model</term>
|
|
||||||
<listitem>
|
|
||||||
<para>The lack of a pre-defined
|
|
||||||
usage model enables the user to run a wide variety of
|
|
||||||
applications without having to know the application
|
|
||||||
requirements in advance. This provides a degree of
|
|
||||||
independence and flexibility that no other cloud
|
|
||||||
scenarios are able to provide.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>On-demand and self-service application</term>
|
|
||||||
<listitem>
|
|
||||||
<para>By
|
|
||||||
definition, a cloud provides end users with the
|
|
||||||
ability to self-provision computing power, storage,
|
|
||||||
networks, and software in a simple and flexible way.
|
|
||||||
The user must be able to scale their resources up to a
|
|
||||||
substantial level without disrupting the underlying
|
|
||||||
host operations. One of the benefits of using a
|
|
||||||
general purpose cloud architecture is the ability to
|
|
||||||
start with limited resources and increase them over
|
|
||||||
time as the user demand grows.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Public cloud</term>
|
|
||||||
<listitem>
|
|
||||||
<para>For a company interested in building a
|
|
||||||
commercial public cloud offering based on OpenStack,
|
|
||||||
the general purpose architecture model might be the
|
|
||||||
best choice. Designers are not always going to
|
|
||||||
know the purposes or workloads for which the end users
|
|
||||||
will use the cloud.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Internal consumption (private) cloud</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Organizations need to determine if it is logical to
|
|
||||||
create their own clouds internally. Using a private cloud,
|
|
||||||
organizations are able to maintain complete control over
|
|
||||||
architectural and cloud components.</para>
|
|
||||||
<note>
|
|
||||||
<para>Users will want to combine
|
|
||||||
using the internal cloud with access to an external
|
|
||||||
cloud. If that case is likely, it might be worth
|
|
||||||
exploring the possibility of taking a multi-cloud
|
|
||||||
approach with regard to at least some of the
|
|
||||||
architectural elements.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
<para>Designs that incorporate the
|
|
||||||
use of multiple clouds, such as a private cloud and a
|
|
||||||
public cloud offering, are described in the
|
|
||||||
"Multi-Cloud" scenario, see <xref linkend="multi_site"/>.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Security</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Security should be implemented according
|
|
||||||
to asset, threat, and vulnerability risk assessment
|
|
||||||
matrices. For cloud domains that require increased
|
|
||||||
computer security, network security, or information
|
|
||||||
security, a general purpose cloud is not considered an
|
|
||||||
appropriate choice.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,190 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="arch-guide-architecture-hybrid">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Architecture</title>
|
|
||||||
<para>Map out the dependencies of the expected workloads
|
|
||||||
and the cloud infrastructures required to support them to architect a
|
|
||||||
solution for the broadest compatibility between cloud platforms,
|
|
||||||
minimizing the need to create workarounds and processes to fill
|
|
||||||
identified gaps.</para>
|
|
||||||
<para>For your chosen cloud management platform, note the relative
|
|
||||||
levels of support for both monitoring and orchestration.</para>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/Multi-Cloud_Priv-AWS4.png"/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
|
|
||||||
<section xml:id="image-portability">
|
|
||||||
<title>Image portability</title>
|
|
||||||
<para>The majority of cloud workloads currently run on instances
|
|
||||||
using hypervisor technologies. The challenge is that each of these
|
|
||||||
hypervisors uses an image format that may not be compatible with the
|
|
||||||
others. When possible, standardize on a single hypervisor and instance
|
|
||||||
image format. This may not be possible when using externally-managed
|
|
||||||
public clouds.</para>
|
|
||||||
<para>Conversion tools exist to address image format compatibility.
|
|
||||||
Examples include <link
|
|
||||||
xlink:href="http://libguestfs.org/virt-v2v">virt-p2v/virt-v2v</link>
|
|
||||||
and <link
|
|
||||||
xlink:href="http://libguestfs.org/virt-edit.1.html">
|
|
||||||
virt-edit</link>. These tools cannot serve beyond basic cloud instance
|
|
||||||
specifications.</para>
|
|
||||||
<para>Alternatively, build a thin operating system image as
|
|
||||||
the base for new instances. This facilitates rapid creation of cloud
|
|
||||||
instances using cloud orchestration or configuration management tools
|
|
||||||
for more specific templating. Remember if you intend to use portable
|
|
||||||
images for disaster recovery, application diversity, or high
|
|
||||||
availability, your users could move the images and instances between
|
|
||||||
cloud platforms regularly.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="upper-layer-services">
|
|
||||||
<title>Upper-layer services</title>
|
|
||||||
<para>Many clouds offer complementary services beyond the
|
|
||||||
basic compute, network, and storage components. These
|
|
||||||
additional services often simplify the deployment
|
|
||||||
and management of applications on a cloud platform.</para>
|
|
||||||
<para>When moving workloads from the source to the destination
|
|
||||||
cloud platforms, consider that the destination cloud platform
|
|
||||||
may not have comparable services. Implement workloads in a
|
|
||||||
different way or by using a different technology.</para>
|
|
||||||
<para>For example, moving an application that uses a NoSQL database
|
|
||||||
service such as MongoDB could cause difficulties in maintaining
|
|
||||||
the application between the platforms.</para>
|
|
||||||
<para>There are a number of options that are appropriate for
|
|
||||||
the hybrid cloud use case:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Implementing a baseline of upper-layer services
|
|
||||||
across all of the cloud platforms. For
|
|
||||||
platforms that do not support a given service, create
|
|
||||||
a service on top of that platform and apply it to the
|
|
||||||
workloads as they are launched on that cloud.</para>
|
|
||||||
<para>For example, through the <glossterm>Database service</glossterm>
|
|
||||||
for OpenStack (<glossterm>trove</glossterm>),
|
|
||||||
OpenStack supports MySQL-as-a-Service but not NoSQL
|
|
||||||
databases in production. To move from or run
|
|
||||||
alongside AWS, a NoSQL workload must use an automation
|
|
||||||
tool, such as the Orchestration service (heat), to
|
|
||||||
recreate the NoSQL database on top of OpenStack.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Deploying a <glossterm>Platform-as-a-Service (PaaS)</glossterm>
|
|
||||||
technology that abstracts the
|
|
||||||
upper-layer services from the underlying cloud
|
|
||||||
platform. The unit of application deployment and
|
|
||||||
migration is the PaaS. It leverages the services of
|
|
||||||
the PaaS and only consumes the base infrastructure
|
|
||||||
services of the cloud platform.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Using automation tools to create the required upper-layer services
|
|
||||||
that are portable across all cloud platforms.</para>
|
|
||||||
<para>For example, instead of using database services that
|
|
||||||
are inherent in the cloud platforms, launch cloud
|
|
||||||
instances and deploy the databases on those
|
|
||||||
instances using scripts or configuration and
|
|
||||||
application deployment tools.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="network-services">
|
|
||||||
<title>Network services</title>
|
|
||||||
<para>Network services functionality is a critical component of
|
|
||||||
multiple cloud architectures. It is an important factor
|
|
||||||
to assess when choosing a CMP and cloud provider.
|
|
||||||
Considerations include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Functionality
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Security
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Scalability
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
High availability (HA)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Verify and test critical cloud endpoint features.</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>After selecting the network functionality framework,
|
|
||||||
you must confirm the functionality is compatible. This
|
|
||||||
ensures testing and functionality persists
|
|
||||||
during and after upgrades.</para>
|
|
||||||
<note>
|
|
||||||
<para>Diverse cloud platforms may de-synchronize
|
|
||||||
over time if you do not maintain their mutual compatibility.
|
|
||||||
This is a particular issue with APIs.</para>
|
|
||||||
</note>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Scalability across multiple cloud providers determines
|
|
||||||
your choice of underlying network framework. It is important to
|
|
||||||
have the network API functions presented and to verify
|
|
||||||
that the desired functionality persists across all
|
|
||||||
chosen cloud endpoint.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>High availability implementations vary in
|
|
||||||
functionality and design. Examples of some common
|
|
||||||
methods are active-hot-standby, active-passive, and
|
|
||||||
active-active. Develop your high availability
|
|
||||||
implementation and a test framework to understand
|
|
||||||
the functionality and limitations of the environment.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>It is imperative to address security considerations.
|
|
||||||
For example, addressing how data is secured between client and
|
|
||||||
endpoint and any traffic that traverses the multiple clouds.
|
|
||||||
Business and regulatory requirements dictate what security
|
|
||||||
approach to take. For more information, see the
|
|
||||||
<link linkend="security-overview">Security
|
|
||||||
Requirements Chapter</link></para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="data">
|
|
||||||
<title>Data</title>
|
|
||||||
<para>Traditionally, replication has been the best method of protecting
|
|
||||||
object store implementations. A variety of replication methods exist
|
|
||||||
in storage architectures, for example synchronous and asynchronous
|
|
||||||
mirroring. Most object stores and back-end storage systems implement
|
|
||||||
methods for replication at the storage subsystem layer.
|
|
||||||
Object stores also tailor replication techniques
|
|
||||||
to fit a cloud's requirements.</para>
|
|
||||||
<para>Organizations must find the right balance between
|
|
||||||
data integrity and data availability. Replication strategy may
|
|
||||||
also influence disaster recovery methods.</para>
|
|
||||||
<para>Replication across different racks, data centers, and
|
|
||||||
geographical regions increases focus on
|
|
||||||
determining and ensuring data locality. The ability to
|
|
||||||
guarantee data is accessed from the nearest or fastest storage
|
|
||||||
can be necessary for applications to perform well.</para>
|
|
||||||
<note>
|
|
||||||
<para>When running embedded object store methods, ensure that you do
|
|
||||||
not instigate extra data replication as this can cause performance
|
|
||||||
issues.</para>
|
|
||||||
</note>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,86 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="arch-guide-hybrid-operational-considerations">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Operational considerations</title>
|
|
||||||
<para>Hybrid cloud deployments present complex operational
|
|
||||||
challenges. Differences between provider clouds can cause
|
|
||||||
incompatibilities with workloads or Cloud Management
|
|
||||||
Platforms (CMP). Cloud providers may also offer different levels of
|
|
||||||
integration with competing cloud offerings.</para>
|
|
||||||
<para>Monitoring is critical to maintaining a hybrid cloud, and it is
|
|
||||||
important to determine if a CMP supports
|
|
||||||
monitoring of all the clouds involved, or if compatible APIs
|
|
||||||
are available to be queried for necessary information.</para>
|
|
||||||
|
|
||||||
<section xml:id="agility">
|
|
||||||
<title>Agility</title>
|
|
||||||
<para>Hybrid clouds provide application
|
|
||||||
availability across different cloud environments and
|
|
||||||
technologies. This availability enables the deployment to
|
|
||||||
survive disaster in any single cloud environment.
|
|
||||||
Each cloud should provide the means to create instances quickly
|
|
||||||
in response to capacity issues or failure elsewhere in the hybrid
|
|
||||||
cloud.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="application-readiness-hybrid">
|
|
||||||
<title>Application readiness</title>
|
|
||||||
<para>Enterprise workloads that depend on the
|
|
||||||
underlying infrastructure for availability are not designed to
|
|
||||||
run on OpenStack. If the application cannot
|
|
||||||
tolerate infrastructure failures, it is likely to require
|
|
||||||
significant operator intervention to recover. Applications for
|
|
||||||
hybrid clouds must be fault tolerant, with an SLA that is not tied
|
|
||||||
to the underlying infrastructure. Ideally, cloud applications should be
|
|
||||||
able to recover when entire racks and data centers experience an
|
|
||||||
outage.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="upgrades">
|
|
||||||
<title>Upgrades</title>
|
|
||||||
<para>If a deployment includes a public cloud, predicting
|
|
||||||
upgrades may not be possible. Carefully examine provider SLAs.</para>
|
|
||||||
<note>
|
|
||||||
<para>At massive scale, even when
|
|
||||||
dealing with a cloud that offers an SLA with a high percentage
|
|
||||||
of uptime, workloads must be able to recover quickly.</para>
|
|
||||||
</note>
|
|
||||||
<para>When upgrading private cloud deployments, minimize disruption by
|
|
||||||
making incremental changes and providing a facility to either rollback
|
|
||||||
or continue to roll forward when using a continuous delivery
|
|
||||||
model.</para>
|
|
||||||
<para>You may need to coordinate CMP upgrades with hybrid cloud upgrades if
|
|
||||||
there are API changes.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="network-operation-center-noc">
|
|
||||||
<title>Network Operation Center</title>
|
|
||||||
<para>Consider infrastructure control
|
|
||||||
when planning the Network Operation Center (NOC)
|
|
||||||
for a hybrid cloud environment. If a significant
|
|
||||||
portion of the cloud is on externally managed systems,
|
|
||||||
prepare for situations where it may not be possible to
|
|
||||||
make changes.
|
|
||||||
Additionally, providers may differ on how
|
|
||||||
infrastructure must be managed and exposed. This can lead to
|
|
||||||
delays in root cause analysis where each insists the blame
|
|
||||||
lies with the other provider.</para>
|
|
||||||
<para>Ensure that the network structure connects all clouds to form
|
|
||||||
integrated system, keeping in mind the state of handoffs.
|
|
||||||
These handoffs must both be as reliable as possible and
|
|
||||||
include as little latency as possible to ensure the best
|
|
||||||
performance of the overall system.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="maintainability">
|
|
||||||
<title>Maintainability</title>
|
|
||||||
<para>Hybrid clouds rely on third party systems and processes. As a
|
|
||||||
result, it is not possible to guarantee
|
|
||||||
proper maintenance of the overall system. Instead, be prepared to
|
|
||||||
abandon workloads and recreate them in an improved state.</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,173 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="prescriptive-examples-multi-cloud">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Prescriptive examples</title>
|
|
||||||
<para>Hybrid cloud environments are designed for
|
|
||||||
these use cases:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Bursting workloads from private to public OpenStack
|
|
||||||
clouds</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Bursting workloads from private to public
|
|
||||||
non-OpenStack clouds</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>High availability across clouds (for technical
|
|
||||||
diversity)</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>This chapter provides examples of environments
|
|
||||||
that address each of these use cases.</para>
|
|
||||||
<section xml:id="bursting-to-public-openstack-cloud">
|
|
||||||
<title>Bursting to a public OpenStack cloud</title>
|
|
||||||
<para>Company A's data center is running low on
|
|
||||||
capacity. It is not possible to expand the data center in the
|
|
||||||
foreseeable future. In order to accommodate
|
|
||||||
the continuously growing need for development resources in the
|
|
||||||
organization, Company A decides to use resources in the public
|
|
||||||
cloud.</para>
|
|
||||||
<para>Company A has an established data
|
|
||||||
center with a substantial amount of hardware. Migrating the
|
|
||||||
workloads to a public cloud is not feasible.</para>
|
|
||||||
<para>The company has an internal cloud management platform that
|
|
||||||
directs requests to the appropriate cloud, depending on
|
|
||||||
the local capacity. This is a custom in-house application written for
|
|
||||||
this specific purpose.</para>
|
|
||||||
<para>This solution is depicted in the figure below:</para>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/Multi-Cloud_Priv-Pub3.png"
|
|
||||||
/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
<para>This example shows two clouds with a Cloud Management
|
|
||||||
Platform (CMP) connecting them. This guide does not
|
|
||||||
discuss a specific CMP, but describes how the Orchestration and
|
|
||||||
Telemetry services handle, manage, and control workloads.</para>
|
|
||||||
<para>The private OpenStack cloud has at least one
|
|
||||||
controller and at least one compute node. It includes
|
|
||||||
metering using the Telemetry service. The Telemetry service
|
|
||||||
captures the load increase and the CMP processes the information.
|
|
||||||
If there is available capacity, the CMP uses the
|
|
||||||
OpenStack API to call the Orchestration service. This creates
|
|
||||||
instances on the private cloud in response to user requests.
|
|
||||||
When capacity is not available on the private cloud,
|
|
||||||
the CMP issues a request to the Orchestration service API of
|
|
||||||
the public cloud. This creates the instance on the public
|
|
||||||
cloud.</para>
|
|
||||||
<para>In this example, Company A does not direct the deployments to an
|
|
||||||
external public cloud due to concerns regarding resource control,
|
|
||||||
security, and increased operational expense</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="bursting-to-public-nonopenstack-cloud">
|
|
||||||
<title>Bursting to a public non-OpenStack cloud</title>
|
|
||||||
<para>The second example examines bursting workloads from the
|
|
||||||
private cloud into a non-OpenStack public cloud using Amazon
|
|
||||||
Web Services (AWS) to take advantage of additional capacity
|
|
||||||
and to scale applications.</para>
|
|
||||||
<para>The following diagram demonstrates an OpenStack-to-AWS hybrid
|
|
||||||
cloud:</para>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/Multi-Cloud_Priv-AWS4.png"
|
|
||||||
/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
<para>Company B states that its developers are already using AWS and
|
|
||||||
do not want to change to a different provider.</para>
|
|
||||||
<para>If the CMP is capable of connecting to an external
|
|
||||||
cloud provider with an appropriate API, the workflow process
|
|
||||||
remains the same as the previous scenario. The actions the
|
|
||||||
CMP takes, such as monitoring loads and creating new instances,
|
|
||||||
stay the same. However, the CMP performs actions in the
|
|
||||||
public cloud using applicable API calls.</para>
|
|
||||||
<para>If the public cloud is AWS, the CMP would use the
|
|
||||||
EC2 API to create a new instance and assign an Elastic IP.
|
|
||||||
It can then add that IP to HAProxy in the private cloud.
|
|
||||||
The CMP can also reference AWS-specific
|
|
||||||
tools such as CloudWatch and CloudFormation.</para>
|
|
||||||
<para>Several open source tool kits for building CMPs are
|
|
||||||
available and can handle this kind of translation. Examples include
|
|
||||||
ManageIQ, jClouds, and JumpGate.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="high-availability-disaster-recovery">
|
|
||||||
<title>High availability and disaster recovery</title>
|
|
||||||
<para>Company C requires their local data center
|
|
||||||
to be able to recover from failure. Some of the
|
|
||||||
workloads currently in use are running on their private
|
|
||||||
OpenStack cloud. Protecting the data involves Block Storage,
|
|
||||||
Object Storage, and a database. The architecture
|
|
||||||
supports the failure of large components of the system while
|
|
||||||
ensuring that the system continues to deliver services.
|
|
||||||
While the services remain available to users, the failed
|
|
||||||
components are restored in the background based on standard
|
|
||||||
best practice data replication policies. To achieve these objectives,
|
|
||||||
Company C replicates data to a second cloud in a geographically distant
|
|
||||||
location. The following diagram describes this system:</para>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/Multi-Cloud_failover2.png"
|
|
||||||
/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
<para>This example includes two private OpenStack clouds connected
|
|
||||||
with a CMP. The source cloud,
|
|
||||||
OpenStack Cloud 1, includes a controller and at least one
|
|
||||||
instance running MySQL. It also includes at least one Block
|
|
||||||
Storage volume and one Object Storage volume. This means that data
|
|
||||||
is available to the users at all times. The details of the
|
|
||||||
method for protecting each of these sources of data
|
|
||||||
differs.</para>
|
|
||||||
<para>Object Storage relies on the replication capabilities of
|
|
||||||
the Object Storage provider. Company C enables OpenStack Object Storage
|
|
||||||
so that it creates geographically separated replicas
|
|
||||||
that take advantage of this feature. The company configures storage
|
|
||||||
so that at least one replica exists in each cloud. In order to make
|
|
||||||
this work, the company configures a single array spanning both clouds
|
|
||||||
with OpenStack Identity. Using Federated Identity, the array talks
|
|
||||||
to both clouds, communicating with OpenStack Object Storage
|
|
||||||
through the Swift proxy.</para>
|
|
||||||
<para>For Block Storage, the replication is a little more
|
|
||||||
difficult, and involves tools outside of OpenStack itself. The
|
|
||||||
OpenStack Block Storage volume is not set as the drive itself
|
|
||||||
but as a logical object that points to a physical back end. Disaster
|
|
||||||
recovery is configured for Block Storage for
|
|
||||||
synchronous backup for the highest level of data protection,
|
|
||||||
but asynchronous backup could have been set as an alternative
|
|
||||||
that is not as latency sensitive. For asynchronous backup, the
|
|
||||||
Block Storage API makes it possible to export the data and also the
|
|
||||||
metadata of a particular volume, so that it can be moved and
|
|
||||||
replicated elsewhere. More information can be found here:
|
|
||||||
<link
|
|
||||||
xlink:href="https://blueprints.launchpad.net/cinder/+spec/cinder-backup-volume-metadata-support">
|
|
||||||
https://blueprints.launchpad.net/cinder/+spec/cinder-backup-volume-metadata-support</link>.
|
|
||||||
</para>
|
|
||||||
<para>The synchronous backups create an identical volume in both
|
|
||||||
clouds and chooses the appropriate flavor so that each cloud
|
|
||||||
has an identical back end. This is done by creating volumes
|
|
||||||
through the CMP. After this is configured, a solution
|
|
||||||
involving DRDB synchronizes the physical drives.</para>
|
|
||||||
<para>The database component is backed up using synchronous
|
|
||||||
backups. MySQL does not support geographically diverse
|
|
||||||
replication, so disaster recovery is provided by replicating
|
|
||||||
the file itself. As it is not possible to use Object Storage
|
|
||||||
as the back end of a database like MySQL, Swift replication
|
|
||||||
is not an option. Company C decides not to store the data on
|
|
||||||
another geo-tiered storage system, such as Ceph, as Block
|
|
||||||
Storage. This would have given another layer of protection.
|
|
||||||
Another option would have been to store the database on an
|
|
||||||
OpenStack Block Storage volume and backing it up like any
|
|
||||||
other Block Storage.</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,196 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE section [
|
|
||||||
<!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
|
|
||||||
%openstack;
|
|
||||||
]>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="technical-considerations-hybrid">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Technical considerations</title>
|
|
||||||
<para>A hybrid cloud environment requires inspection and
|
|
||||||
understanding of technical issues in external data centers that may
|
|
||||||
not be in your control. Ideally, select an architecture
|
|
||||||
and CMP that are adaptable to changing environments.</para>
|
|
||||||
<para>Using diverse cloud platforms increases the risk of compatibility
|
|
||||||
issues, but clouds using the same version and distribution
|
|
||||||
of OpenStack are less likely to experience problems.</para>
|
|
||||||
<para>Clouds that exclusively use the same versions of OpenStack should
|
|
||||||
have no issues, regardless of distribution. More recent distributions
|
|
||||||
are less likely to encounter incompatibility between versions. An
|
|
||||||
OpenStack community initiative defines core functions that need to
|
|
||||||
remain backward compatible between supported versions. For example, the
|
|
||||||
DefCore initiative defines basic functions that every distribution must
|
|
||||||
support in order to use the name <productname>OpenStack</productname>.
|
|
||||||
</para>
|
|
||||||
<para>Vendors can add proprietary customization to their distributions. If
|
|
||||||
an application or architecture makes use of these features, it can be
|
|
||||||
difficult to migrate to or use other types of environments.</para>
|
|
||||||
<para>If an environment includes non-OpenStack clouds, it may experience
|
|
||||||
compatibility problems. CMP tools must account for the differences in
|
|
||||||
the handling of operations and the implementation of services.</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<title>Possible cloud incompatibilities</title>
|
|
||||||
<listitem>
|
|
||||||
<para>Instance deployment</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Network management</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Application management</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Services implementation</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
|
|
||||||
<section xml:id="capacity-planning-hybrid">
|
|
||||||
<title>Capacity planning</title>
|
|
||||||
<para>One of the primary reasons many organizations use a
|
|
||||||
hybrid cloud is to increase capacity without making large capital
|
|
||||||
investments.</para>
|
|
||||||
<para>Capacity and the placement of workloads are key design considerations
|
|
||||||
for hybrid clouds. The long-term capacity plan for these
|
|
||||||
designs must incorporate growth over time to prevent permanent
|
|
||||||
consumption of more expensive external clouds. To avoid this scenario,
|
|
||||||
account for future applications' capacity requirements and plan growth
|
|
||||||
appropriately.</para>
|
|
||||||
<para>It is difficult to predict the amount of load a particular
|
|
||||||
application might incur if the number of users fluctuates, or the
|
|
||||||
application experiences an unexpected increase in use. It is
|
|
||||||
possible to define application requirements in terms of vCPU, RAM,
|
|
||||||
bandwidth, or other resources and plan appropriately. However, other
|
|
||||||
clouds might not use the same meter or even the same oversubscription
|
|
||||||
rates.</para>
|
|
||||||
<para>Oversubscription is a method to emulate more capacity than
|
|
||||||
may physically be present. For example, a physical
|
|
||||||
hypervisor node with 32 GB RAM may host 24
|
|
||||||
instances, each provisioned with 2 GB RAM. As long
|
|
||||||
as all 24 instances do not concurrently use 2 full
|
|
||||||
gigabytes, this arrangement works well. However, some
|
|
||||||
hosts take oversubscription to extremes and, as a result,
|
|
||||||
performance can be inconsistent. If at all
|
|
||||||
possible, determine what the oversubscription rates of each
|
|
||||||
host are and plan capacity accordingly.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="utilization-hybrid">
|
|
||||||
<title>Utilization</title>
|
|
||||||
<para>A CMP must be aware of what workloads are running, where they are
|
|
||||||
running, and their preferred utilizations. For example, in
|
|
||||||
most cases it is desirable to run as many workloads internally
|
|
||||||
as possible, utilizing other resources only when necessary. On
|
|
||||||
the other hand, situations exist in which the opposite is
|
|
||||||
true, such as when an internal cloud is only for development and
|
|
||||||
stressing it is undesirable. A cost model of various scenarios and
|
|
||||||
consideration of internal priorities helps with this decision. To
|
|
||||||
improve efficiency, automate these decisions when possible.</para>
|
|
||||||
<para>The Telemetry service (ceilometer) provides information on the usage
|
|
||||||
of various OpenStack components. Note the following:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
If Telemetry must retain a large amount of data, for
|
|
||||||
example when monitoring a large or active cloud, we recommend
|
|
||||||
using a NoSQL back end such as MongoDB.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
You must monitor connections to non-OpenStack clouds
|
|
||||||
and report this information to the CMP.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="performance-hybrid">
|
|
||||||
<title>Performance</title>
|
|
||||||
<para>Performance is critical to hybrid cloud deployments, and they are
|
|
||||||
affected by many of the same issues as multi-site deployments,
|
|
||||||
such as network latency between sites. Also consider the time required
|
|
||||||
to run a workload in different clouds and methods for reducing this
|
|
||||||
time. This may require moving data closer to applications
|
|
||||||
or applications closer to the data they process, and
|
|
||||||
grouping functionality so that connections that
|
|
||||||
require low latency take place over a single cloud rather than
|
|
||||||
spanning clouds. This may also require a CMP that can determine which
|
|
||||||
cloud can most efficiently run which types of workloads.</para>
|
|
||||||
<para>As with utilization, native OpenStack tools help improve performance.
|
|
||||||
For example, you can use Telemetry to measure performance and the
|
|
||||||
Orchestration service (heat) to react to changes in demand.</para>
|
|
||||||
<note>
|
|
||||||
<para>Orchestration requires special client configurations to integrate
|
|
||||||
with Amazon Web Services. For other types of clouds, use CMP
|
|
||||||
features.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="components">
|
|
||||||
<title>Components</title>
|
|
||||||
<para>Using more than one cloud in any design requires consideration of
|
|
||||||
four OpenStack tools:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>OpenStack Compute (nova)</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Regardless of deployment location, hypervisor choice has a
|
|
||||||
direct effect on how difficult it is to integrate with
|
|
||||||
additional clouds.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Networking (neutron)</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Whether using OpenStack Networking (neutron) or legacy
|
|
||||||
networking (nova-network), it is necessary to understand
|
|
||||||
network integration capabilities in order to
|
|
||||||
connect between clouds.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Telemetry (ceilometer)</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Use of Telemetry depends, in large part, on what the other
|
|
||||||
parts of the cloud you are using.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Orchestration (heat)</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Orchestration can be a valuable tool in orchestrating tasks a
|
|
||||||
CMP decides are necessary in an OpenStack-based cloud.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="special-considerations-hybrid">
|
|
||||||
<title>Special considerations</title>
|
|
||||||
<para>Hybrid cloud deployments require consideration of two issues that
|
|
||||||
are not common in other situations:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Image portability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>As of the Kilo release, there is no common image format that is
|
|
||||||
usable by all clouds. Conversion or recreation of images is necessary
|
|
||||||
if migrating between clouds. To simplify deployment, use the smallest
|
|
||||||
and simplest images feasible, install only what is necessary, and
|
|
||||||
use a deployment manager such as Chef or Puppet. Do not use golden
|
|
||||||
images to speed up the process unless you repeatedly deploy the same
|
|
||||||
images on the same cloud.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>API differences</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Avoid using a hybrid cloud deployment with more than just
|
|
||||||
OpenStack (or with different versions of OpenStack) as API changes
|
|
||||||
can cause compatibility issues.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,258 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="user-requirements-hybrid">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>User requirements</title>
|
|
||||||
<para>Hybrid cloud architectures are complex, especially those
|
|
||||||
that use heterogeneous cloud platforms. Ensure that design choices
|
|
||||||
match requirements so that the benefits outweigh the inherent additional
|
|
||||||
complexity and risks.</para>
|
|
||||||
<variablelist>
|
|
||||||
<title>Business considerations when designing a hybrid
|
|
||||||
cloud deployment</title>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Cost</term>
|
|
||||||
<listitem>
|
|
||||||
<para>A hybrid cloud architecture involves multiple
|
|
||||||
vendors and technical architectures. These
|
|
||||||
architectures may be more expensive to deploy and
|
|
||||||
maintain. Operational costs can be higher because of
|
|
||||||
the need for more sophisticated orchestration and
|
|
||||||
brokerage tools than in other architectures. In
|
|
||||||
contrast, overall operational costs might be lower by
|
|
||||||
virtue of using a cloud brokerage tool to deploy the
|
|
||||||
workloads to the most cost effective platform.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Revenue opportunity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Revenue opportunities vary based on the intent and use case
|
|
||||||
of the cloud. As a commercial, customer-facing product, you
|
|
||||||
must consider whether building over multiple platforms makes
|
|
||||||
the design more attractive to customers.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Time-to-market</term>
|
|
||||||
<listitem>
|
|
||||||
<para>One common reason to use cloud platforms is to improve the
|
|
||||||
time-to-market of a new product or application. For example,
|
|
||||||
using multiple cloud platforms is viable because there is an
|
|
||||||
existing investment in several applications. It is faster to
|
|
||||||
tie the investments together rather than migrate the
|
|
||||||
components and refactoring them to a single platform.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Business or technical diversity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Organizations leveraging cloud-based services can
|
|
||||||
embrace business diversity and utilize a hybrid cloud
|
|
||||||
design to spread their workloads across multiple cloud
|
|
||||||
providers. This ensures that no single cloud provider is
|
|
||||||
the sole host for an application.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Application momentum</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Businesses with existing applications may find that it is
|
|
||||||
more cost effective to integrate applications on multiple
|
|
||||||
cloud platforms than migrating them to a single platform.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
|
|
||||||
<section xml:id="workload-considerations">
|
|
||||||
<title>Workload considerations</title>
|
|
||||||
<para>A workload can be a single application or a suite of applications
|
|
||||||
that work together. It can also be a duplicate set of applications that
|
|
||||||
need to run on multiple cloud environments. In a hybrid cloud
|
|
||||||
deployment, the same workload often needs to function
|
|
||||||
equally well on radically different public and private cloud
|
|
||||||
environments. The architecture needs to address these
|
|
||||||
potential conflicts, complexity, and platform
|
|
||||||
incompatibilities.</para>
|
|
||||||
<variablelist>
|
|
||||||
<title>Use cases for a hybrid cloud architecture</title>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Dynamic resource expansion or bursting</term>
|
|
||||||
<listitem>
|
|
||||||
<para>An application that requires additional resources may suit
|
|
||||||
a multiple cloud architecture.
|
|
||||||
For example, a retailer needs additional resources
|
|
||||||
during the holiday season, but does not want to add private
|
|
||||||
cloud resources to meet the peak demand. The user can
|
|
||||||
accommodate the increased load by bursting to
|
|
||||||
a public cloud for these peak load
|
|
||||||
periods. These bursts could be for long or short
|
|
||||||
cycles ranging from hourly to yearly.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Disaster recovery and business continuity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Cheaper storage makes the public
|
|
||||||
cloud suitable for maintaining backup applications.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Federated hypervisor and instance management</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Adding self-service, charge back, and transparent delivery of
|
|
||||||
the resources from a federated pool can be cost
|
|
||||||
effective. In a hybrid cloud environment, this is a
|
|
||||||
particularly important consideration. Look for a cloud
|
|
||||||
that provides cross-platform hypervisor support and
|
|
||||||
robust instance management tools.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Application portfolio integration</term>
|
|
||||||
<listitem>
|
|
||||||
<para>An enterprise cloud delivers efficient application portfolio
|
|
||||||
management and deployments by leveraging
|
|
||||||
self-service features and rules according to use. Integrating
|
|
||||||
existing cloud environments is a common driver when building
|
|
||||||
hybrid cloud architectures.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Migration scenarios</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Hybrid cloud architecture enables the migration of
|
|
||||||
applications between different clouds.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>High availability</term>
|
|
||||||
<listitem>
|
|
||||||
<para>A combination of locations and platforms enables a
|
|
||||||
level of availability that is not
|
|
||||||
possible with a single platform. This approach increases
|
|
||||||
design complexity.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<para>As running a workload on multiple cloud platforms increases design
|
|
||||||
complexity, we recommend first exploring options such as transferring
|
|
||||||
workloads across clouds at the application, instance, cloud platform,
|
|
||||||
hypervisor, and network levels.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="tools-considerations-hybrid">
|
|
||||||
<title>Tools considerations</title>
|
|
||||||
<para>Hybrid cloud designs must incorporate tools to facilitate working
|
|
||||||
across multiple clouds.</para>
|
|
||||||
<variablelist>
|
|
||||||
<title>Tool functions</title>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Broker between clouds</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Brokering software evaluates relative costs between different
|
|
||||||
cloud platforms. Cloud Management Platforms (CMP)
|
|
||||||
allow the designer to determine the right location for the
|
|
||||||
workload based on predetermined criteria.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Facilitate orchestration across the clouds</term>
|
|
||||||
<listitem>
|
|
||||||
<para>CMPs simplify the migration of application workloads between
|
|
||||||
public, private, and hybrid cloud platforms. We recommend
|
|
||||||
using cloud orchestration tools for managing a diverse
|
|
||||||
portfolio of systems and applications across multiple cloud
|
|
||||||
platforms.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="network-considerations-hybrid">
|
|
||||||
<title>Network considerations</title>
|
|
||||||
<para>It is important to consider the functionality, security, scalability,
|
|
||||||
availability, and testability of network when choosing a CMP and cloud
|
|
||||||
provider.</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Decide on a network framework and
|
|
||||||
design minimum functionality tests. This ensures
|
|
||||||
testing and functionality persists during and after
|
|
||||||
upgrades.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Scalability across multiple cloud providers may
|
|
||||||
dictate which underlying network framework you
|
|
||||||
choose in different cloud providers. It is important
|
|
||||||
to present the network API functions and to
|
|
||||||
verify that functionality persists across all cloud
|
|
||||||
endpoints chosen.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>High availability implementations vary in
|
|
||||||
functionality and design. Examples of some common
|
|
||||||
methods are active-hot-standby, active-passive, and
|
|
||||||
active-active. Development of high availability and test
|
|
||||||
frameworks is necessary to insure understanding of
|
|
||||||
functionality and limitations.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Consider the security of data between the client and the
|
|
||||||
endpoint, and of traffic that traverses the multiple
|
|
||||||
clouds.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="risk-mitigation-management-hybrid">
|
|
||||||
<title>Risk mitigation and management considerations</title>
|
|
||||||
<para>Hybrid cloud architectures introduce additional risk because
|
|
||||||
they are more complex than a single cloud design and may involve
|
|
||||||
incompatible components or tools. However, they also reduce
|
|
||||||
risk by spreading workloads over multiple providers.</para>
|
|
||||||
<variablelist>
|
|
||||||
<title>Hybrid cloud risks</title>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Provider availability or implementation details</term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Business changes can affect provider availability. Likewise,
|
|
||||||
changes in a provider's service can disrupt a hybrid cloud
|
|
||||||
environment or increase costs.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Differing SLAs</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Hybrid cloud designs must accommodate differences in SLAs
|
|
||||||
between providers, and consider their enforceability.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Security levels</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Securing multiple cloud
|
|
||||||
environments is more complex than securing single
|
|
||||||
cloud environments. We recommend addressing concerns at
|
|
||||||
the application, network, and cloud platform levels.
|
|
||||||
Be aware that each cloud platform approaches security
|
|
||||||
differently, and a hybrid cloud design must address and
|
|
||||||
compensate for these differences.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Provider API changes</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Consumers of external clouds rarely have control over
|
|
||||||
provider changes to APIs, and changes can break compatibility.
|
|
||||||
Using only the most common and basic APIs can minimize
|
|
||||||
potential conflicts.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,106 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="arch-guide-how-this-book-is-organized">
|
|
||||||
<title>How this book is organized</title>
|
|
||||||
<para>This book examines some of the most common uses for OpenStack
|
|
||||||
clouds, and explains the considerations for each use case.
|
|
||||||
Cloud architects may use this book as a comprehensive guide by
|
|
||||||
reading all of the use cases, but it is also possible to review
|
|
||||||
only the chapters which pertain to a specific use case.
|
|
||||||
The use cases covered in this guide include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link linkend="generalpurpose">General purpose</link>: Uses common components that address
|
|
||||||
80% of common use cases.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link linkend="compute_focus">Compute focused</link>: For compute intensive workloads
|
|
||||||
such as high performance computing (HPC).
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link linkend="storage_focus">Storage focused</link>: For storage intensive workloads such as
|
|
||||||
data analytics with parallel file systems.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link linkend="network_focus">Network focused</link>: For high performance and reliable
|
|
||||||
networking, such as a <glossterm
|
|
||||||
>content delivery network (CDN)</glossterm>.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link linkend="multi_site">Multi-site</link>: For applications that require multiple site
|
|
||||||
deployments for geographical, reliability or data
|
|
||||||
locality reasons.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link linkend="hybrid">Hybrid cloud</link>: Uses multiple disparate clouds
|
|
||||||
connected either for failover, hybrid cloud bursting, or
|
|
||||||
availability.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link linkend="massively_scalable">Massively
|
|
||||||
scalable</link>: For
|
|
||||||
cloud service providers or other large
|
|
||||||
installations
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
<link linkend="specialized">Specialized cases</link>: Architectures that have not
|
|
||||||
previously been covered in the defined use cases.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
|
|
||||||
<!-- This section is currrently commented out as it is irrelevant within the current
|
|
||||||
context. However, there are plans to use this list in the future. Please do not remove.
|
|
||||||
|
|
||||||
<para>Each chapter in the guide is then further broken down into
|
|
||||||
the following sections:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Introduction: Provides an overview of the
|
|
||||||
architectural use case.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>User requirements: Defines the set of user
|
|
||||||
considerations that typically come into play for that
|
|
||||||
use case.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Technical considerations: Covers the technical
|
|
||||||
issues that must be accounted when dealing with this
|
|
||||||
use case.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Operational considerations: Covers the ongoing
|
|
||||||
operational tasks associated with this use case and
|
|
||||||
architecture.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Architecture: Covers the overall architecture
|
|
||||||
associated with the use case.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Prescriptive examples: Presents one or more
|
|
||||||
scenarios where this architecture could be
|
|
||||||
deployed.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
-->
|
|
||||||
</section>
|
|
@ -1,95 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="arch-guide-why-and-who-we-wrote-this-book">
|
|
||||||
<title>Why and how we wrote this book</title>
|
|
||||||
<para>We wrote this book to guide you through designing an OpenStack cloud
|
|
||||||
architecture. This guide identifies design considerations
|
|
||||||
for common cloud use cases and provides examples.</para>
|
|
||||||
<para>The Architecture Design Guide was written in a book sprint format,
|
|
||||||
which is a facilitated, rapid development production method for books.
|
|
||||||
The Book Sprint was facilitated by Faith Bosworth and Adam
|
|
||||||
Hyde of Book Sprints, for more information, see the Book Sprints website
|
|
||||||
(www.booksprints.net).</para>
|
|
||||||
<para>This book was written in five days during July 2014 while
|
|
||||||
exhausting the M&M, Mountain Dew and healthy options
|
|
||||||
supply, complete with juggling entertainment during lunches at
|
|
||||||
VMware's headquarters in Palo Alto.</para>
|
|
||||||
<para>We would like to thank VMware for their generous
|
|
||||||
hospitality, as well as our employers, Cisco, Cloudscaling,
|
|
||||||
Comcast, EMC, Mirantis, Rackspace, Red Hat, Verizon, and
|
|
||||||
VMware, for enabling us to contribute our time. We would
|
|
||||||
especially like to thank Anne Gentle and Kenneth Hui for all
|
|
||||||
of their shepherding and organization in making this
|
|
||||||
happen.</para>
|
|
||||||
<para>The author team includes:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Kenneth Hui (EMC)
|
|
||||||
<link xlink:href="http://twitter.com/hui_kenneth"
|
|
||||||
>@hui_kenneth</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Alexandra Settle (Rackspace)
|
|
||||||
<link xlink:href="http://twitter.com/dewsday"
|
|
||||||
>@dewsday</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Anthony Veiga (Comcast)
|
|
||||||
<link xlink:href="http://twitter.com/daaelar"
|
|
||||||
>@daaelar</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Beth Cohen (Verizon)
|
|
||||||
<link xlink:href="http://twitter.com/bfcohen"
|
|
||||||
>@bfcohen</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Kevin Jackson (Rackspace)
|
|
||||||
<link xlink:href="http://twitter.com/itarchitectkev"
|
|
||||||
>@itarchitectkev</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Maish Saidel-Keesing (Cisco)
|
|
||||||
<link xlink:href="http://twitter.com/maishsk"
|
|
||||||
>@maishsk</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Nick Chase (Mirantis)
|
|
||||||
<link xlink:href="http://twitter.com/NickChase"
|
|
||||||
>@NickChase</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Scott Lowe (VMware)
|
|
||||||
<link xlink:href="http://twitter.com/scott_lowe"
|
|
||||||
>@scott_lowe</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Sean Collins (Comcast)
|
|
||||||
<link xlink:href="http://twitter.com/sc68cal"
|
|
||||||
>@sc68cal</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Sean Winn (Cloudscaling)
|
|
||||||
<link xlink:href="http://twitter.com/seanmwinn"
|
|
||||||
>@seanmwinn</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Sebastian Gutierrez (Red Hat)
|
|
||||||
<link xlink:href="http://twitter.com/gutseb"
|
|
||||||
>@gutseb</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Stephen Gordon (Red Hat)
|
|
||||||
<link xlink:href="http://twitter.com/xsgordon"
|
|
||||||
>@xsgordon</link></para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Vinny Valdez (Red Hat)
|
|
||||||
<link xlink:href="http://twitter.com/VinnyValdez"
|
|
||||||
>@VinnyValdez</link></para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
@ -1,18 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="arch-guide-intended-audience">
|
|
||||||
<title>Intended audience</title>
|
|
||||||
<para>This book has been written for architects and designers of
|
|
||||||
OpenStack clouds. For a guide on deploying and operating
|
|
||||||
OpenStack, please refer to the <citetitle>OpenStack Operations
|
|
||||||
Guide</citetitle> (<link
|
|
||||||
xlink:href="http://docs.openstack.org/openstack-ops">http://docs.openstack.org/openstack-ops</link>).
|
|
||||||
</para>
|
|
||||||
<para>Before reading this book, we recommend prior knowledge of cloud architecture
|
|
||||||
and principles, experience in enterprise system design, Linux
|
|
||||||
and virtualization experience, and a basic understanding of
|
|
||||||
networking principles and protocols.</para>
|
|
||||||
</section>
|
|
@ -1,204 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE section [
|
|
||||||
<!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
|
|
||||||
%openstack;
|
|
||||||
]>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="methodology">
|
|
||||||
<title>Methodology</title>
|
|
||||||
<para>The best way to design your cloud architecture is through creating and
|
|
||||||
testing use cases. Planning for applications that support thousands of
|
|
||||||
sessions per second, variable workloads, and complex, changing data,
|
|
||||||
requires you to identify the key meters. Identifying these key meters,
|
|
||||||
such as number of concurrent transactions per second, and size of
|
|
||||||
database, makes it possible to build a method for testing your assumptions.</para>
|
|
||||||
<para>Use a functional user scenario to develop test cases, and to measure
|
|
||||||
overall project trajectory.</para>
|
|
||||||
<note>
|
|
||||||
<para>If you do not want to use an application to develop user
|
|
||||||
requirements automatically, you need to create requirements to build
|
|
||||||
test harnesses and develop usable meters.</para>
|
|
||||||
</note>
|
|
||||||
<para>Establishing these meters allows you to respond to changes quickly without
|
|
||||||
having to set exact requirements in advance.
|
|
||||||
This creates ways to configure the system, rather than redesigning
|
|
||||||
it every time there is a requirements change.</para>
|
|
||||||
<important>
|
|
||||||
<para>It is important to limit scope creep. Ensure you address tool limitations,
|
|
||||||
but do not recreate the entire suite of tools. Work
|
|
||||||
with technical product owners to establish critical features that are needed
|
|
||||||
for a successful cloud deployment.</para>
|
|
||||||
</important>
|
|
||||||
|
|
||||||
<section xml:id="application-cloud-readiness-methods">
|
|
||||||
<title>Application cloud readiness</title>
|
|
||||||
<para>The cloud does more than host virtual machines and their applications.
|
|
||||||
This <emphasis>lift and shift</emphasis>
|
|
||||||
approach works in certain situations, but there is a fundamental
|
|
||||||
difference between clouds and traditional bare-metal-based
|
|
||||||
environments, or even traditional virtualized environments.</para>
|
|
||||||
<para>In traditional environments, with traditional enterprise
|
|
||||||
applications, the applications and the servers that run on them are
|
|
||||||
<emphasis>pets</emphasis>.
|
|
||||||
They are lovingly crafted and cared for, the servers have
|
|
||||||
names like Gandalf or Tardis, and if they get sick someone nurses
|
|
||||||
them back to health. All of this is designed so that the application
|
|
||||||
does not experience an outage.</para>
|
|
||||||
<para>In cloud environments, servers are more like
|
|
||||||
cattle. There are thousands of them, they get names like NY-1138-Q,
|
|
||||||
and if they get sick, they get put down and a sysadmin installs
|
|
||||||
another one. Traditional applications that are unprepared for this
|
|
||||||
kind of environment may suffer outages, loss of data, or
|
|
||||||
complete failure.</para>
|
|
||||||
<para>There are other reasons to design applications with the cloud in mind.
|
|
||||||
Some are defensive, such as the fact that because applications cannot be
|
|
||||||
certain of exactly where or on what hardware they will be launched,
|
|
||||||
they need to be flexible, or at least adaptable. Others are
|
|
||||||
proactive. For example, one of the advantages of using the cloud is
|
|
||||||
scalability. Applications need to be designed in such a way that
|
|
||||||
they can take advantage of these and other opportunities.</para>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="determining-whether-an-application-is-cloud-ready">
|
|
||||||
<title>Determining whether an application is cloud-ready</title>
|
|
||||||
<para>There are several factors to take into consideration when looking
|
|
||||||
at whether an application is a good fit for the cloud.</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Structure</term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
A large, monolithic, single-tiered, legacy
|
|
||||||
application typically is not a good fit for the
|
|
||||||
cloud. Efficiencies are gained when load can be
|
|
||||||
spread over several instances, so that a failure
|
|
||||||
in one part of the system can be mitigated without
|
|
||||||
affecting other parts of the system, or so that
|
|
||||||
scaling can take place where the app needs
|
|
||||||
it.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Dependencies</term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Applications that depend on specific
|
|
||||||
hardware, such as a particular chip set or an
|
|
||||||
external device such as a fingerprint
|
|
||||||
reader, might not be a good fit for the
|
|
||||||
cloud, unless those dependencies are specifically
|
|
||||||
addressed. Similarly, if an application depends on
|
|
||||||
an operating system or set of libraries that
|
|
||||||
cannot be used in the cloud, or cannot be
|
|
||||||
virtualized, that is a problem.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Connectivity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Self-contained applications, or those that depend
|
|
||||||
on resources that are not reachable by the cloud
|
|
||||||
in question, will not run. In some situations,
|
|
||||||
you can work around these issues with custom network
|
|
||||||
setup, but how well this works depends on the
|
|
||||||
chosen cloud environment.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Durability and resilience</term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
Despite the existence of SLAs, things break:
|
|
||||||
servers go down, network connections are
|
|
||||||
disrupted, or too many tenants on a server make a
|
|
||||||
server unusable. An application must be sturdy
|
|
||||||
enough to contend with these issues.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
|
|
||||||
<section xml:id="designing-for-the-cloud">
|
|
||||||
<title>Designing for the cloud</title>
|
|
||||||
<para>Here are some guidelines to keep in mind when designing an
|
|
||||||
application for the cloud:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Be a pessimist: Assume everything fails and design
|
|
||||||
backwards.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Put your eggs in multiple baskets: Leverage multiple
|
|
||||||
providers, geographic regions and availability zones to
|
|
||||||
accommodate for local availability issues. Design for
|
|
||||||
portability.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Think efficiency: Inefficient designs will not scale.
|
|
||||||
Efficient designs become cheaper as they scale. Kill off
|
|
||||||
unneeded components or capacity.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Be paranoid: Design for defense in depth and zero
|
|
||||||
tolerance by building in security at every level and between
|
|
||||||
every component. Trust no one.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>But not too paranoid: Not every application needs the
|
|
||||||
platinum solution. Architect for different SLA's, service
|
|
||||||
tiers, and security levels.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Manage the data: Data is usually the most inflexible and
|
|
||||||
complex area of a cloud and cloud integration architecture.
|
|
||||||
Do not short change the effort in analyzing and addressing
|
|
||||||
data needs.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Hands off: Leverage automation to increase consistency and
|
|
||||||
quality and reduce response times.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Divide and conquer: Pursue partitioning and
|
|
||||||
parallel layering wherever possible. Make components as small
|
|
||||||
and portable as possible. Use load balancing between layers.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Think elasticity: Increasing resources should result in a
|
|
||||||
proportional increase in performance and scalability.
|
|
||||||
Decreasing resources should have the opposite effect.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Be dynamic: Enable dynamic configuration changes such as
|
|
||||||
auto scaling, failure recovery and resource discovery to
|
|
||||||
adapt to changing environments, faults, and workload volumes.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Stay close: Reduce latency by moving highly interactive
|
|
||||||
components and data near each other.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Keep it loose: Loose coupling, service interfaces,
|
|
||||||
separation of concerns, abstraction, and well defined API's
|
|
||||||
deliver flexibility.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Be cost aware: Autoscaling, data transmission, virtual
|
|
||||||
software licenses, reserved instances, and similar costs can rapidly
|
|
||||||
increase monthly usage charges. Monitor usage closely.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,102 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="operational-considerations-massive-scale">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Operational considerations</title>
|
|
||||||
<para>In order to run efficiently at massive scale, automate
|
|
||||||
as many of the operational processes as
|
|
||||||
possible. Automation includes the configuration of
|
|
||||||
provisioning, monitoring and alerting systems. Part of the
|
|
||||||
automation process includes the capability to determine when
|
|
||||||
human intervention is required and who should act. The
|
|
||||||
objective is to increase the ratio of operational staff to
|
|
||||||
running systems as much as possible in order to reduce maintenance
|
|
||||||
costs. In a massively scaled environment, it is very difficult
|
|
||||||
for staff to give each system individual care.</para>
|
|
||||||
<para>Configuration management tools such as Puppet and Chef enable
|
|
||||||
operations staff to categorize systems into groups based on
|
|
||||||
their roles and thus create configurations and system states
|
|
||||||
that the provisioning system enforces. Systems
|
|
||||||
that fall out of the defined state due to errors or failures
|
|
||||||
are quickly removed from the pool of active nodes and
|
|
||||||
replaced.</para>
|
|
||||||
<para>At large scale the resource cost of diagnosing failed individual
|
|
||||||
systems is far greater than the cost of
|
|
||||||
replacement. It is more economical to replace the failed
|
|
||||||
system with a new system, provisioning and configuring it
|
|
||||||
automatically and adding it to the pool of active nodes.
|
|
||||||
By automating tasks that are labor-intensive,
|
|
||||||
repetitive, and critical to operations, cloud operations
|
|
||||||
teams can work more
|
|
||||||
efficiently because fewer resources are required for these
|
|
||||||
common tasks. Administrators are then free to tackle
|
|
||||||
tasks that are not easy to automate and that have longer-term
|
|
||||||
impacts on the business, for example, capacity planning.</para>
|
|
||||||
<section xml:id="the-bleeding-edge">
|
|
||||||
<title>The bleeding edge</title>
|
|
||||||
<para>Running OpenStack at massive scale requires striking a
|
|
||||||
balance between stability and features. For example, it might
|
|
||||||
be tempting to run an older stable release branch of OpenStack
|
|
||||||
to make deployments easier. However, when running at massive
|
|
||||||
scale, known issues that may be of some concern or only have
|
|
||||||
minimal impact in smaller deployments could become pain points.
|
|
||||||
Recent releases may address well known issues. The OpenStack
|
|
||||||
community can help resolve reported issues by applying
|
|
||||||
the collective expertise of the OpenStack developers.</para>
|
|
||||||
<para>The number of organizations running at
|
|
||||||
massive scales is a small proportion of the
|
|
||||||
OpenStack community, therefore it is important to share
|
|
||||||
related issues with the community and be a vocal advocate for
|
|
||||||
resolving them. Some issues only manifest when operating at
|
|
||||||
large scale, and the number of organizations able to duplicate
|
|
||||||
and validate an issue is small, so it is important to
|
|
||||||
document and dedicate resources to their resolution.</para>
|
|
||||||
<para>In some cases, the resolution to the problem is ultimately
|
|
||||||
to deploy a more recent version of OpenStack. Alternatively,
|
|
||||||
when you must resolve an issue in a production
|
|
||||||
environment where rebuilding the entire environment is not an
|
|
||||||
option, it is sometimes possible to deploy updates to specific
|
|
||||||
underlying components in order to resolve issues or gain
|
|
||||||
significant performance improvements. Although this may appear
|
|
||||||
to expose the deployment to
|
|
||||||
increased risk and instability, in many cases it
|
|
||||||
could be an undiscovered issue.</para>
|
|
||||||
<para>We recommend building a development and operations
|
|
||||||
organization that is responsible for creating desired
|
|
||||||
features, diagnosing and resolving issues, and building the
|
|
||||||
infrastructure for large scale continuous integration tests
|
|
||||||
and continuous deployment. This helps catch bugs early and
|
|
||||||
makes deployments faster and easier. In addition to
|
|
||||||
development resources, we also recommend the recruitment
|
|
||||||
of experts in the fields of message queues, databases, distributed
|
|
||||||
systems, networking, cloud, and storage.</para></section>
|
|
||||||
<section xml:id="growth-and-capacity-planning">
|
|
||||||
<title>Growth and capacity planning</title>
|
|
||||||
<para>An important consideration in running at massive scale is
|
|
||||||
projecting growth and utilization trends in order to plan capital
|
|
||||||
expenditures for the short and long term. Gather utilization
|
|
||||||
meters for compute, network, and storage, along with historical
|
|
||||||
records of these meters. While securing major
|
|
||||||
anchor tenants can lead to rapid jumps in the utilization
|
|
||||||
rates of all resources, the steady adoption of the cloud
|
|
||||||
inside an organization or by consumers in a public
|
|
||||||
offering also creates a steady trend of increased
|
|
||||||
utilization.</para></section>
|
|
||||||
<section xml:id="skills-and-training">
|
|
||||||
<title>Skills and training</title>
|
|
||||||
<para>Projecting growth for storage, networking, and compute is
|
|
||||||
only one aspect of a growth plan for running OpenStack at
|
|
||||||
massive scale. Growing and nurturing development and
|
|
||||||
operational staff is an additional consideration. Sending team
|
|
||||||
members to OpenStack conferences, meetup events, and
|
|
||||||
encouraging active participation in the mailing lists and
|
|
||||||
committees is a very important way to maintain skills and
|
|
||||||
forge relationships in the community. For a list of OpenStack
|
|
||||||
training providers in the marketplace, see: <link
|
|
||||||
xlink:href="http://www.openstack.org/marketplace/training/">http://www.openstack.org/marketplace/training/</link>.
|
|
||||||
</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,131 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE section [
|
|
||||||
<!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
|
|
||||||
%openstack;
|
|
||||||
]>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="technical-considerations-massive-scale">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Technical considerations</title>
|
|
||||||
<para>Repurposing an existing OpenStack environment to be
|
|
||||||
massively scalable is a formidable task. When building
|
|
||||||
a massively scalable environment from the ground up, ensure
|
|
||||||
you build the initial deployment with the same principles
|
|
||||||
and choices that apply as the environment grows. For example,
|
|
||||||
a good approach is to deploy the first site as a multi-site
|
|
||||||
environment. This enables you to use the same deployment
|
|
||||||
and segregation methods as the environment grows to separate
|
|
||||||
locations across dedicated links or wide area networks. In
|
|
||||||
a hyperscale cloud, scale trumps redundancy. Modify applications
|
|
||||||
with this in mind, relying on the scale and homogeneity of the
|
|
||||||
environment to provide reliability rather than redundant
|
|
||||||
infrastructure provided by non-commodity hardware
|
|
||||||
solutions.</para>
|
|
||||||
<section xml:id="infrastructure-segregation-massive-scale">
|
|
||||||
<title>Infrastructure segregation</title>
|
|
||||||
<para>OpenStack services support massive horizontal scale.
|
|
||||||
Be aware that this is not the case for the entire supporting
|
|
||||||
infrastructure. This is particularly a problem for the database
|
|
||||||
management systems and message queues that OpenStack services
|
|
||||||
use for data storage and remote procedure call communications.</para>
|
|
||||||
<para>Traditional clustering techniques typically
|
|
||||||
provide high availability and some additional scale for these
|
|
||||||
environments. In the quest for massive scale, however, you must
|
|
||||||
take additional steps to relieve the performance
|
|
||||||
pressure on these components in order to prevent them from negatively
|
|
||||||
impacting the overall performance of the environment. Ensure that
|
|
||||||
all the components are in balance so that if the massively
|
|
||||||
scalable environment fails, all the components are near maximum
|
|
||||||
capacity and a single component is not causing the failure.</para>
|
|
||||||
<para>Regions segregate completely independent
|
|
||||||
installations linked only by an Identity and Dashboard
|
|
||||||
(optional) installation. Services have separate
|
|
||||||
API endpoints for each region, and include separate database
|
|
||||||
and queue installations. This exposes some awareness of the
|
|
||||||
environment's fault domains to users and gives them the
|
|
||||||
ability to ensure some degree of application resiliency while
|
|
||||||
also imposing the requirement to specify which region to apply
|
|
||||||
their actions to.</para>
|
|
||||||
<para>Environments operating at massive scale typically need their
|
|
||||||
regions or sites subdivided further without exposing the
|
|
||||||
requirement to specify the failure domain to the user. This
|
|
||||||
provides the ability to further divide the installation into
|
|
||||||
failure domains while also providing a logical unit for
|
|
||||||
maintenance and the addition of new hardware. At hyperscale,
|
|
||||||
instead of adding single compute nodes, administrators can add
|
|
||||||
entire racks or even groups of racks at a time with each new
|
|
||||||
addition of nodes exposed via one of the segregation concepts
|
|
||||||
mentioned herein.</para>
|
|
||||||
<para><glossterm baseform="cell">Cells</glossterm> provide the ability
|
|
||||||
to subdivide the compute portion
|
|
||||||
of an OpenStack installation, including regions, while still
|
|
||||||
exposing a single endpoint. Each region has an API cell
|
|
||||||
along with a number of compute cells where the
|
|
||||||
workloads actually run. Each cell has its own database and
|
|
||||||
message queue setup (ideally clustered), providing the ability
|
|
||||||
to subdivide the load on these subsystems, improving overall
|
|
||||||
performance.</para>
|
|
||||||
<para>Each compute cell provides a complete compute installation,
|
|
||||||
complete with full database and queue installations,
|
|
||||||
scheduler, conductor, and multiple compute hosts. The cells
|
|
||||||
scheduler handles placement of user requests from the single
|
|
||||||
API endpoint to a specific cell from those available. The
|
|
||||||
normal filter scheduler then handles placement within the
|
|
||||||
cell.</para>
|
|
||||||
<para>Unfortunately, Compute is the only OpenStack service that
|
|
||||||
provides good support for cells. In addition, cells
|
|
||||||
do not adequately support some standard
|
|
||||||
OpenStack functionality such as security groups and host
|
|
||||||
aggregates. Due to their relative newness and specialized use,
|
|
||||||
cells receive relatively little testing in the OpenStack gate.
|
|
||||||
Despite these issues, cells play an important role in
|
|
||||||
well known OpenStack installations operating at massive scale,
|
|
||||||
such as those at CERN and Rackspace.</para></section>
|
|
||||||
<section xml:id="host-aggregates">
|
|
||||||
<title>Host aggregates</title>
|
|
||||||
<para>Host aggregates enable partitioning of OpenStack Compute
|
|
||||||
deployments into logical groups for load balancing and
|
|
||||||
instance distribution. You can also use host aggregates to
|
|
||||||
further partition an availability zone. Consider a cloud which
|
|
||||||
might use host aggregates to partition an availability zone
|
|
||||||
into groups of hosts that either share common resources, such
|
|
||||||
as storage and network, or have a special property, such as
|
|
||||||
trusted computing hardware. You cannot target host aggregates
|
|
||||||
explicitly. Instead, select instance flavors that map to host
|
|
||||||
aggregate metadata. These flavors target host aggregates
|
|
||||||
implicitly.</para></section>
|
|
||||||
<section xml:id="availability-zones">
|
|
||||||
<title>Availability zones</title>
|
|
||||||
<para>Availability zones provide another mechanism for subdividing
|
|
||||||
an installation or region. They are, in effect, host
|
|
||||||
aggregates exposed for (optional) explicit targeting
|
|
||||||
by users.</para>
|
|
||||||
<para>Unlike cells, availability zones do not have their own database
|
|
||||||
server or queue broker but represent an arbitrary grouping of
|
|
||||||
compute nodes. Typically, nodes are grouped into availability
|
|
||||||
zones using a shared failure domain based on a physical
|
|
||||||
characteristic such as a shared power source or physical network
|
|
||||||
connections. Users can target exposed availability zones; however,
|
|
||||||
this is not a requirement. An alternative approach is to set a default
|
|
||||||
availability zone to schedule instances to a non-default availability
|
|
||||||
zone of <literal>nova</literal>.</para></section>
|
|
||||||
<section xml:id="segregation-example">
|
|
||||||
<title>Segregation example</title>
|
|
||||||
<para>In this example the cloud is divided into two regions, one
|
|
||||||
for each site, with two availability zones in each based on
|
|
||||||
the power layout of the data centers. A number of host
|
|
||||||
aggregates enable targeting of
|
|
||||||
virtual machine instances using flavors, that require special
|
|
||||||
capabilities shared by the target hosts such as SSDs, 10 GbE
|
|
||||||
networks, or GPU cards.</para>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/Massively_Scalable_Cells_+_regions_+_azs.png"
|
|
||||||
/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject></section>
|
|
||||||
</section>
|
|
@ -1,135 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="user-requirements-massive-scale-overview">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>User requirements</title>
|
|
||||||
<para>Defining user requirements for a massively scalable OpenStack
|
|
||||||
design architecture dictates approaching the design from two
|
|
||||||
different, yet sometimes opposing, perspectives: the cloud
|
|
||||||
user, and the cloud operator. The expectations and perceptions
|
|
||||||
of the consumption and management of resources of a massively
|
|
||||||
scalable OpenStack cloud from these two perspectives are
|
|
||||||
distinctly different.</para>
|
|
||||||
<para>Massively scalable OpenStack clouds have the following user
|
|
||||||
requirements:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>The cloud user expects repeatable, dependable, and
|
|
||||||
deterministic processes for launching and deploying
|
|
||||||
cloud resources. You could deliver this through a
|
|
||||||
web-based interface or publicly available API
|
|
||||||
endpoints. All appropriate options for requesting
|
|
||||||
cloud resources must be available through some type
|
|
||||||
of user interface, a command-line interface (CLI), or
|
|
||||||
API endpoints.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Cloud users expect a fully self-service and
|
|
||||||
on-demand consumption model. When an OpenStack cloud
|
|
||||||
reaches the "massively scalable" size, expect
|
|
||||||
consumption "as a service" in each and
|
|
||||||
every way.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>For a user of a massively scalable OpenStack public
|
|
||||||
cloud, there are no expectations for control over
|
|
||||||
security, performance, or availability. Users expect
|
|
||||||
only SLAs related to uptime of API services, and
|
|
||||||
very basic SLAs for services offered. It is the user's
|
|
||||||
responsibility to address these issues on their own.
|
|
||||||
The exception to this expectation is the rare case of
|
|
||||||
a massively scalable cloud infrastructure built for
|
|
||||||
a private or government organization that has
|
|
||||||
specific requirements.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>The cloud user's requirements and expectations that determine
|
|
||||||
the cloud design focus on the consumption model. The user
|
|
||||||
expects to consume cloud resources in an automated and
|
|
||||||
deterministic way, without any need for knowledge of the
|
|
||||||
capacity, scalability, or other attributes of the cloud's
|
|
||||||
underlying infrastructure.</para>
|
|
||||||
<section xml:id="operator-requirements-massive-scale">
|
|
||||||
<title>Operator requirements</title>
|
|
||||||
<para>While the cloud user can be completely unaware of the
|
|
||||||
underlying infrastructure of the cloud and its attributes, the
|
|
||||||
operator must build and support the infrastructure for operating
|
|
||||||
at scale. This presents a very demanding set of requirements
|
|
||||||
for building such a cloud from the operator's perspective:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Everything must be capable of automation. For example,
|
|
||||||
everything from compute hardware, storage hardware,
|
|
||||||
networking hardware, to the installation and
|
|
||||||
configuration of the supporting software. Manual
|
|
||||||
processes are impractical in a massively scalable
|
|
||||||
OpenStack design architecture.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>The cloud operator requires that capital expenditure
|
|
||||||
(CapEx) is minimized at all layers of the stack.
|
|
||||||
Operators of massively scalable OpenStack clouds
|
|
||||||
require the use of dependable commodity hardware and
|
|
||||||
freely available open source software components to
|
|
||||||
reduce deployment costs and operational expenses.
|
|
||||||
Initiatives like OpenCompute (more information
|
|
||||||
available at <link
|
|
||||||
xlink:href="http://www.opencompute.org">http://www.opencompute.org</link>)
|
|
||||||
provide additional information and pointers. To cut
|
|
||||||
costs, many operators sacrifice redundancy. For
|
|
||||||
example, using redundant power supplies, network
|
|
||||||
connections, and rack switches.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Companies operating a massively scalable OpenStack
|
|
||||||
cloud also require that operational expenditures
|
|
||||||
(OpEx) be minimized as much as possible. We
|
|
||||||
recommend using cloud-optimized hardware when
|
|
||||||
managing operational overhead. Some of
|
|
||||||
the factors to consider include power,
|
|
||||||
cooling, and the physical design of the chassis. Through
|
|
||||||
customization, it is possible to optimize the hardware
|
|
||||||
and systems for this type of workload because of the
|
|
||||||
scale of these implementations.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Massively scalable OpenStack clouds require
|
|
||||||
extensive metering and monitoring functionality to
|
|
||||||
maximize the operational efficiency by keeping the
|
|
||||||
operator informed about the status and state of the
|
|
||||||
infrastructure. This includes full scale metering of
|
|
||||||
the hardware and software status. A corresponding
|
|
||||||
framework of logging and alerting is also required to
|
|
||||||
store and enable operations to act on the meters
|
|
||||||
provided by the metering and monitoring solutions.
|
|
||||||
The cloud operator also needs a solution that uses the
|
|
||||||
data provided by the metering and monitoring solution
|
|
||||||
to provide capacity planning and capacity trending
|
|
||||||
analysis.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Invariably, massively scalable OpenStack clouds extend
|
|
||||||
over several sites. Therefore, the user-operator
|
|
||||||
requirements for a multi-site OpenStack architecture
|
|
||||||
design are also applicable here. This includes various
|
|
||||||
legal requirements; other jurisdictional legal or
|
|
||||||
compliance requirements; image
|
|
||||||
consistency-availability; storage replication and
|
|
||||||
availability (both block and file/object storage); and
|
|
||||||
authentication, authorization, and auditing (AAA).
|
|
||||||
See <xref linkend="multi_site"/>
|
|
||||||
for more details on requirements and considerations
|
|
||||||
for multi-site OpenStack clouds.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>The design architecture of a massively scalable OpenStack
|
|
||||||
cloud must address considerations around physical
|
|
||||||
facilities such as space, floor weight, rack height and
|
|
||||||
type, environmental considerations, power usage and power
|
|
||||||
usage efficiency (PUE), and physical security.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist></section>
|
|
||||||
</section>
|
|
@ -1,123 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="arch-design-architecture-multiple-site">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Architecture</title>
|
|
||||||
<para><xref linkend="multi-site_arch"/>
|
|
||||||
illustrates a high level multi-site OpenStack
|
|
||||||
architecture. Each site is an OpenStack cloud but it may be necessary
|
|
||||||
to architect the sites on different versions. For example, if the
|
|
||||||
second site is intended to be a replacement for the first site,
|
|
||||||
they would be different. Another common design would be a private
|
|
||||||
OpenStack cloud with a replicated site that would be used for high
|
|
||||||
availability or disaster recovery. The most important design decision
|
|
||||||
is configuring storage as a single shared pool or separate pools,
|
|
||||||
depending on user and technical requirements.</para>
|
|
||||||
<figure xml:id="multi-site_arch">
|
|
||||||
<title>Multi-site OpenStack architecture</title>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="6in"
|
|
||||||
fileref="../figures/Multi-Site_shared_keystone_horizon_swift1.png"/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
</figure>
|
|
||||||
<section xml:id="openstack-services-architecture">
|
|
||||||
<title>OpenStack services architecture</title>
|
|
||||||
<para>The Identity service, which is used by all other
|
|
||||||
OpenStack components for authorization and the catalog of
|
|
||||||
service endpoints, supports the concept of regions. A region
|
|
||||||
is a logical construct used to group OpenStack services in
|
|
||||||
close proximity to one another. The concept of
|
|
||||||
regions is flexible; it may contain OpenStack service
|
|
||||||
endpoints located within a distinct geographic region or regions.
|
|
||||||
It may be smaller in scope, where a region is a single rack
|
|
||||||
within a data center, with multiple regions existing in adjacent
|
|
||||||
racks in the same data center.</para>
|
|
||||||
<para>The majority of OpenStack components are designed to run
|
|
||||||
within the context of a single region. The Compute
|
|
||||||
service is designed to manage compute resources within a region,
|
|
||||||
with support for subdivisions of compute resources by using
|
|
||||||
availability zones and cells. The Networking service
|
|
||||||
can be used to manage network resources in the same broadcast
|
|
||||||
domain or collection of switches that are linked. The OpenStack
|
|
||||||
Block Storage service controls storage resources within a region
|
|
||||||
with all storage resources residing on the same storage network.
|
|
||||||
Like the OpenStack Compute service, the OpenStack Block Storage
|
|
||||||
service also supports the availability zone construct which can
|
|
||||||
be used to subdivide storage resources.</para>
|
|
||||||
<para>The OpenStack dashboard, OpenStack Identity, and OpenStack
|
|
||||||
Object Storage services are components that can each be deployed
|
|
||||||
centrally in order to serve multiple regions.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="arch-multi-storage">
|
|
||||||
<title>Storage</title>
|
|
||||||
<para>With multiple OpenStack regions, it is recommended to configure
|
|
||||||
a single OpenStack Object Storage service endpoint to deliver
|
|
||||||
shared file storage for all regions. The Object Storage service
|
|
||||||
internally replicates files to multiple nodes which can be used
|
|
||||||
by applications or workloads in multiple regions. This simplifies
|
|
||||||
high availability failover and disaster recovery rollback.</para>
|
|
||||||
<para>In order to scale the Object Storage service to meet the workload
|
|
||||||
of multiple regions, multiple proxy workers are run and
|
|
||||||
load-balanced, storage nodes are installed in each region, and the
|
|
||||||
entire Object Storage Service can be fronted by an HTTP caching
|
|
||||||
layer. This is done so client requests for objects can be served out
|
|
||||||
of caches rather than directly from the storage modules themselves,
|
|
||||||
reducing the actual load on the storage network. In addition to an
|
|
||||||
HTTP caching layer, use a caching layer like Memcache to cache
|
|
||||||
objects between the proxy and storage nodes.</para>
|
|
||||||
<para>If the cloud is designed with a separate Object Storage
|
|
||||||
service endpoint made available in each region, applications are
|
|
||||||
required to handle synchronization (if desired) and other management
|
|
||||||
operations to ensure consistency across the nodes. For some
|
|
||||||
applications, having multiple Object Storage Service endpoints
|
|
||||||
located in the same region as the application may be desirable due
|
|
||||||
to reduced latency, cross region bandwidth, and ease of
|
|
||||||
deployment.</para>
|
|
||||||
<note>
|
|
||||||
<para>For the Block Storage service, the most important decisions
|
|
||||||
are the selection of the storage technology, and whether
|
|
||||||
a dedicated network is used to carry storage traffic
|
|
||||||
from the storage service to the compute nodes.</para>
|
|
||||||
</note>
|
|
||||||
</section>
|
|
||||||
<section xml:id="arch-networking-multiple">
|
|
||||||
<title>Networking</title>
|
|
||||||
<para>When connecting multiple regions together, there are several design
|
|
||||||
considerations. The overlay network technology choice determines how
|
|
||||||
packets are transmitted between regions and how the logical network
|
|
||||||
and addresses present to the application. If there are security or
|
|
||||||
regulatory requirements, encryption should be implemented to secure
|
|
||||||
the traffic between regions. For networking inside a region, the
|
|
||||||
overlay network technology for tenant networks is equally important.
|
|
||||||
The overlay technology and the network traffic that an application
|
|
||||||
generates or receives can be either complementary or serve cross
|
|
||||||
purposes. For example, using an overlay technology for an application
|
|
||||||
that transmits a large amount of small packets could add excessive
|
|
||||||
latency or overhead to each packet if not configured
|
|
||||||
properly.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="arch-dependencies-multiple">
|
|
||||||
<title>Dependencies</title>
|
|
||||||
<para>The architecture for a multi-site OpenStack installation
|
|
||||||
is dependent on a number of factors. One major dependency to
|
|
||||||
consider is storage. When designing the storage system, the
|
|
||||||
storage mechanism needs to be determined. Once the storage
|
|
||||||
type is determined, how it is accessed is critical. For example,
|
|
||||||
we recommend that storage should use a dedicated network.
|
|
||||||
Another concern is how the storage is configured to protect
|
|
||||||
the data. For example, the Recovery Point Objective (RPO) and
|
|
||||||
the Recovery Time Objective (RTO). How quickly recovery from
|
|
||||||
a fault can be completed, determines how often the replication of
|
|
||||||
data is required. Ensure that enough storage is allocated to
|
|
||||||
support the data protection strategy.
|
|
||||||
</para>
|
|
||||||
<para>Networking decisions include the encapsulation mechanism that can
|
|
||||||
be used for the tenant networks, how large the broadcast domains
|
|
||||||
should be, and the contracted SLAs for the interconnects.</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,180 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="operational-considerations-multi-site">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Operational considerations</title>
|
|
||||||
<para>Multi-site OpenStack cloud deployment using regions
|
|
||||||
requires that the service catalog contains per-region entries
|
|
||||||
for each service deployed other than the Identity service. Most
|
|
||||||
off-the-shelf OpenStack deployment tools have limited support
|
|
||||||
for defining multiple regions in this fashion.</para>
|
|
||||||
<para>Deployers should be aware of this and provide the appropriate
|
|
||||||
customization of the service catalog for their site either
|
|
||||||
manually, or by customizing deployment tools in use.</para>
|
|
||||||
<note><para>As of the Kilo release, documentation for
|
|
||||||
implementing this feature is in progress. See this bug for
|
|
||||||
more information:
|
|
||||||
<link
|
|
||||||
xlink:href="https://bugs.launchpad.net/openstack-manuals/+bug/1340509">https://bugs.launchpad.net/openstack-manuals/+bug/1340509</link>.
|
|
||||||
</para></note>
|
|
||||||
<section xml:id="licensing">
|
|
||||||
<title>Licensing</title>
|
|
||||||
<para>Multi-site OpenStack deployments present additional
|
|
||||||
licensing considerations over and above regular OpenStack
|
|
||||||
clouds, particularly where site licenses are in use to provide
|
|
||||||
cost efficient access to software licenses. The licensing for
|
|
||||||
host operating systems, guest operating systems, OpenStack
|
|
||||||
distributions (if applicable), software-defined infrastructure
|
|
||||||
including network controllers and storage systems, and even
|
|
||||||
individual applications need to be evaluated.</para>
|
|
||||||
<para>Topics to consider include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>The definition of what constitutes a site
|
|
||||||
in the relevant licenses, as the term does not
|
|
||||||
necessarily denote a geographic or otherwise
|
|
||||||
physically isolated location.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Differentiations between "hot" (active) and "cold"
|
|
||||||
(inactive) sites, where significant savings may be made
|
|
||||||
in situations where one site is a cold standby for
|
|
||||||
disaster recovery purposes only.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Certain locations might require local vendors to
|
|
||||||
provide support and services for each site which may vary
|
|
||||||
with the licensing agreement in place.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist></section>
|
|
||||||
<section xml:id="logging-and-monitoring-multi-site">
|
|
||||||
<title>Logging and monitoring</title>
|
|
||||||
<para>Logging and monitoring does not significantly differ for a
|
|
||||||
multi-site OpenStack cloud. The tools described in the <link
|
|
||||||
xlink:href="http://docs.openstack.org/openstack-ops/content/logging_monitoring.html">Logging
|
|
||||||
and monitoring chapter</link> of the <citetitle>Operations
|
|
||||||
Guide</citetitle> remain applicable. Logging and monitoring
|
|
||||||
can be provided on a per-site basis, and in a common
|
|
||||||
centralized location.</para>
|
|
||||||
<para>When attempting to deploy logging and monitoring facilities
|
|
||||||
to a centralized location, care must be taken with the load
|
|
||||||
placed on the inter-site networking links.</para></section>
|
|
||||||
<section xml:id="upgrades-multi-site">
|
|
||||||
<title>Upgrades</title>
|
|
||||||
<para>In multi-site OpenStack clouds deployed using regions, sites
|
|
||||||
are independent OpenStack installations which are linked
|
|
||||||
together using shared centralized services such as OpenStack
|
|
||||||
Identity. At a high level the recommended order of operations
|
|
||||||
to upgrade an individual OpenStack environment is (see the <link
|
|
||||||
xlink:href="http://docs.openstack.org/openstack-ops/content/ops_upgrades-general-steps.html">Upgrades
|
|
||||||
chapter</link> of the <citetitle>Operations Guide</citetitle>
|
|
||||||
for details):</para>
|
|
||||||
<orderedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Upgrade the OpenStack Identity service
|
|
||||||
(keystone).</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Upgrade the OpenStack Image service (glance).</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Upgrade OpenStack Compute (nova), including
|
|
||||||
networking components.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Upgrade OpenStack Block Storage (cinder).</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Upgrade the OpenStack dashboard (horizon).</para>
|
|
||||||
</listitem>
|
|
||||||
</orderedlist>
|
|
||||||
<para>The process for upgrading a multi-site environment is not
|
|
||||||
significantly different:</para>
|
|
||||||
<orderedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Upgrade the shared OpenStack Identity service
|
|
||||||
(keystone) deployment.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Upgrade the OpenStack Image service (glance) at each
|
|
||||||
site.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Upgrade OpenStack Compute (nova), including
|
|
||||||
networking components, at each site.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Upgrade OpenStack Block Storage (cinder) at each
|
|
||||||
site.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Upgrade the OpenStack dashboard (horizon), at each
|
|
||||||
site or in the single central location if it is
|
|
||||||
shared.</para>
|
|
||||||
</listitem>
|
|
||||||
</orderedlist>
|
|
||||||
<para>Compute upgrades within each site can also be performed in a rolling
|
|
||||||
fashion. Compute controller services (API, Scheduler, and
|
|
||||||
Conductor) can be upgraded prior to upgrading of individual
|
|
||||||
compute nodes. This allows operations staff to keep a site
|
|
||||||
operational for users of Compute services while performing an
|
|
||||||
upgrade.</para></section>
|
|
||||||
<section xml:id="quota-management-multi-site">
|
|
||||||
<title>Quota management</title>
|
|
||||||
<para>Quotas are used to set operational limits to prevent system
|
|
||||||
capacities from being exhausted without notification. They are
|
|
||||||
currently enforced at the tenant (or project) level rather than
|
|
||||||
at the user level.</para>
|
|
||||||
<para>Quotas are defined on a per-region basis. Operators can
|
|
||||||
define identical quotas for tenants in each region of the
|
|
||||||
cloud to provide a consistent experience, or even create a
|
|
||||||
process for synchronizing allocated quotas across regions. It
|
|
||||||
is important to note that only the operational limits imposed
|
|
||||||
by the quotas will be aligned consumption of quotas by users
|
|
||||||
will not be reflected between regions.</para>
|
|
||||||
<para>For example, given a cloud with two regions, if the operator
|
|
||||||
grants a user a quota of 25 instances in each region then that
|
|
||||||
user may launch a total of 50 instances spread across both
|
|
||||||
regions. They may not, however, launch more than 25 instances
|
|
||||||
in any single region.</para>
|
|
||||||
<para>For more information on managing quotas refer to the
|
|
||||||
<link
|
|
||||||
xlink:href="http://docs.openstack.org/openstack-ops/content/projects_users.html">Managing
|
|
||||||
projects and users chapter</link> of the <citetitle>OpenStack
|
|
||||||
Operators Guide</citetitle>.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="policy-management-multi-site">
|
|
||||||
<title>Policy management</title>
|
|
||||||
<para>OpenStack provides a default set of Role Based Access
|
|
||||||
Control (RBAC) policies, defined in a <filename>policy.json</filename> file, for
|
|
||||||
each service. Operators edit these files to customize the
|
|
||||||
policies for their OpenStack installation. If the application
|
|
||||||
of consistent RBAC policies across sites is a requirement, then
|
|
||||||
it is necessary to ensure proper synchronization of the
|
|
||||||
<filename>policy.json</filename> files to all installations.</para>
|
|
||||||
<para>This must be done using system administration tools
|
|
||||||
such as rsync as functionality for synchronizing policies
|
|
||||||
across regions is not currently provided within OpenStack.</para></section>
|
|
||||||
<section xml:id="documentation-multi-site">
|
|
||||||
<title>Documentation</title>
|
|
||||||
<para>Users must be able to leverage cloud infrastructure and
|
|
||||||
provision new resources in the environment. It is important
|
|
||||||
that user documentation is accessible by users to ensure they
|
|
||||||
are given sufficient information to help them leverage the cloud.
|
|
||||||
As an example, by default OpenStack schedules instances on a compute node
|
|
||||||
automatically. However, when multiple regions are available,
|
|
||||||
the end user needs to decide in which region to schedule the
|
|
||||||
new instance. The dashboard presents the user with
|
|
||||||
the first region in your configuration. The API and CLI tools
|
|
||||||
do not execute commands unless a valid region is specified.
|
|
||||||
It is therefore important to provide documentation to your
|
|
||||||
users describing the region layout as well as calling out that
|
|
||||||
quotas are region-specific. If a user reaches his or her quota
|
|
||||||
in one region, OpenStack does not automatically build new
|
|
||||||
instances in another. Documenting specific examples helps
|
|
||||||
users understand how to operate the cloud, thereby reducing
|
|
||||||
calls and tickets filed with the help desk.</para></section>
|
|
||||||
</section>
|
|
@ -1,236 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE section [
|
|
||||||
<!ENTITY % openstack SYSTEM "../../common/entities/openstack.ent">
|
|
||||||
%openstack;
|
|
||||||
]>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="prescriptive-example-multisite">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Prescriptive examples</title>
|
|
||||||
<para>There are multiple ways to build a multi-site OpenStack
|
|
||||||
installation, based on the needs of the intended workloads.
|
|
||||||
Below are example architectures based on different
|
|
||||||
requirements. These examples are meant as a reference, and not
|
|
||||||
a hard and fast rule for deployments. Use the previous
|
|
||||||
sections of this chapter to assist in selecting specific
|
|
||||||
components and implementations based on specific needs.</para>
|
|
||||||
<para>A large content provider needs to deliver content to
|
|
||||||
customers that are geographically dispersed. The workload is
|
|
||||||
very sensitive to latency and needs a rapid response to
|
|
||||||
end-users. After reviewing the user, technical and operational
|
|
||||||
considerations, it is determined beneficial to build a number
|
|
||||||
of regions local to the customer's edge. Rather than build a
|
|
||||||
few large, centralized data centers, the intent of the architecture
|
|
||||||
is to provide a pair of small data centers in locations that
|
|
||||||
are closer to the customer. In this use
|
|
||||||
case, spreading applications out allows for different
|
|
||||||
horizontal scaling than a traditional compute workload scale.
|
|
||||||
The intent is to scale by creating more copies of the
|
|
||||||
application in closer proximity to the users that need it
|
|
||||||
most, in order to ensure faster response time to user
|
|
||||||
requests. This provider deploys two datacenters at each of
|
|
||||||
the four chosen regions. The implications of this design are
|
|
||||||
based around the method of placing copies of resources in each
|
|
||||||
of the remote regions. Swift objects, Glance images, and block
|
|
||||||
storage need to be manually replicated into each region.
|
|
||||||
This may be beneficial for some systems, such as the case of
|
|
||||||
content service, where only some of the content needs to exist
|
|
||||||
in some but not all regions. A centralized Keystone is
|
|
||||||
recommended to ensure authentication and that access to the
|
|
||||||
API endpoints is easily manageable.</para>
|
|
||||||
<para>It is recommended that you install an automated DNS system such
|
|
||||||
as Designate. Application administrators need a way to
|
|
||||||
manage the mapping of which application copy exists in each
|
|
||||||
region and how to reach it, unless an external Dynamic DNS system
|
|
||||||
is available. Designate assists by making the process automatic
|
|
||||||
and by populating the records in the each region's zone.</para>
|
|
||||||
<para>Telemetry for each region is also deployed, as each region
|
|
||||||
may grow differently or be used at a different rate.
|
|
||||||
Ceilometer collects each region's meters from each
|
|
||||||
of the controllers and report them back to a central location.
|
|
||||||
This is useful both to the end user and the administrator of
|
|
||||||
the OpenStack environment. The end user will find this method
|
|
||||||
useful, as it makes possible to determine if certain
|
|
||||||
locations are experiencing higher load than others, and take
|
|
||||||
appropriate action. Administrators also benefit by
|
|
||||||
possibly being able to forecast growth per region, rather than
|
|
||||||
expanding the capacity of all regions simultaneously,
|
|
||||||
therefore maximizing the cost-effectiveness of the multi-site
|
|
||||||
design.</para>
|
|
||||||
<para>One of the key decisions of running this infrastructure is
|
|
||||||
whether or not to provide a redundancy
|
|
||||||
model. Two types of redundancy and high availability models in
|
|
||||||
this configuration can be implemented. The first type
|
|
||||||
is the availability of central OpenStack
|
|
||||||
components. Keystone can be made highly available in three
|
|
||||||
central data centers that host the centralized OpenStack
|
|
||||||
components. This prevents a loss of any one of the regions
|
|
||||||
causing an outage in service. It also has the added benefit of
|
|
||||||
being able to run a central storage repository as a primary
|
|
||||||
cache for distributing content to each of the regions.</para>
|
|
||||||
<para>The second redundancy type is the edge data center itself.
|
|
||||||
A second data center in each of the edge regional
|
|
||||||
locations house a second region near the first region. This
|
|
||||||
ensures that the application does not suffer degraded
|
|
||||||
performance in terms of latency and availability.</para>
|
|
||||||
<para><xref linkend="multi-site_customer_edge"/> depicts
|
|
||||||
the solution designed to have both a centralized set of core
|
|
||||||
data centers for OpenStack services and paired edge data centers:</para>
|
|
||||||
<figure xml:id="multi-site_customer_edge">
|
|
||||||
<title>Multi-site architecture example</title>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="6in"
|
|
||||||
fileref="../figures/Multi-Site_Customer_Edge.png"/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
</figure>
|
|
||||||
<section xml:id="geo-redundant-load-balancing">
|
|
||||||
<title>Geo-redundant load balancing</title>
|
|
||||||
<para>A large-scale web application has been designed with cloud
|
|
||||||
principles in mind. The application is designed provide
|
|
||||||
service to application store, on a 24/7 basis. The company has
|
|
||||||
typical two tier architecture with a web front-end servicing the
|
|
||||||
customer requests, and a NoSQL database back end storing the
|
|
||||||
information.</para>
|
|
||||||
<para>As of late there has been several outages in number of major
|
|
||||||
public cloud providers due to applications running out of
|
|
||||||
a single geographical location. The design therefore should
|
|
||||||
mitigate the chance of a single site causing an outage for their
|
|
||||||
business.</para>
|
|
||||||
<para>The solution would consist of the following OpenStack
|
|
||||||
components:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>A firewall, switches and load balancers on the
|
|
||||||
public facing network connections.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Controller services running, Networking,
|
|
||||||
dashboard, Block Storage and Compute running locally in
|
|
||||||
each of the three regions. Identity service, Orchestration
|
|
||||||
service, Telemetry service, Image service and
|
|
||||||
Object Storage service can be installed centrally, with
|
|
||||||
nodes in each of the region providing a redundant
|
|
||||||
OpenStack Controller plane throughout the globe.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Compute nodes running the KVM
|
|
||||||
hypervisor.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Object Storage for serving static objects
|
|
||||||
such as images can be used to ensure that all images
|
|
||||||
are standardized across all the regions, and
|
|
||||||
replicated on a regular basis.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>A distributed DNS service available to all
|
|
||||||
regions that allows for dynamic update of DNS
|
|
||||||
records of deployed instances.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>A geo-redundant load balancing service can be used
|
|
||||||
to service the requests from the customers based on
|
|
||||||
their origin.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>An autoscaling heat template can be used to deploy the
|
|
||||||
application in the three regions. This template includes:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Web Servers, running Apache.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Appropriate <literal>user_data</literal> to populate the central DNS
|
|
||||||
servers upon instance launch.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Appropriate Telemetry alarms that maintain state of
|
|
||||||
the application and allow for handling of region or
|
|
||||||
instance failure.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Another autoscaling Heat template can be used to deploy a
|
|
||||||
distributed MongoDB shard over the three locations, with the
|
|
||||||
option of storing required data on a globally available swift
|
|
||||||
container. According to the usage and load on the database
|
|
||||||
server, additional shards can be provisioned according to
|
|
||||||
the thresholds defined in Telemetry.</para>
|
|
||||||
<!-- <para>The reason that three regions were selected here was because of
|
|
||||||
the fear of having abnormal load on a single region in the
|
|
||||||
event of a failure. Two data center would have been sufficient
|
|
||||||
had the requirements been met.</para>-->
|
|
||||||
<para>Two data centers would have been sufficient had the requirements
|
|
||||||
been met. But three regions are selected here to avoid abnormal
|
|
||||||
load on a single region in the event of a failure.</para>
|
|
||||||
<para>Orchestration is used because of the built-in functionality of
|
|
||||||
autoscaling and auto healing in the event of increased load.
|
|
||||||
Additional configuration management tools, such as Puppet or
|
|
||||||
Chef could also have been used in this scenario, but were not
|
|
||||||
chosen since Orchestration had the appropriate built-in
|
|
||||||
hooks into the OpenStack cloud, whereas the other tools were
|
|
||||||
external and not native to OpenStack. In addition, external
|
|
||||||
tools were not needed since this deployment scenario was straight
|
|
||||||
forward.</para>
|
|
||||||
<para>OpenStack Object Storage is used here to serve as a back end for
|
|
||||||
the Image service since it is the most suitable solution for a
|
|
||||||
globally distributed storage solution with its own
|
|
||||||
replication mechanism. Home grown solutions could also have
|
|
||||||
been used including the handling of replication, but were not
|
|
||||||
chosen, because Object Storage is already an intricate part of the
|
|
||||||
infrastructure and a proven solution.</para>
|
|
||||||
<para>An external load balancing service was used and not the
|
|
||||||
LBaaS in OpenStack because the solution in OpenStack is not
|
|
||||||
redundant and does not have any awareness of geo location.</para>
|
|
||||||
<figure xml:id="multi-site_geo_redundant">
|
|
||||||
<title>Multi-site geo-redundant architecture</title>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="6in"
|
|
||||||
fileref="../figures/Multi-site_Geo_Redundant_LB.png"/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
</figure>
|
|
||||||
</section>
|
|
||||||
<section xml:id="location-local-services">
|
|
||||||
<title>Location-local service</title>
|
|
||||||
<para>A common use for multi-site OpenStack deployment is
|
|
||||||
creating a Content Delivery Network. An application that
|
|
||||||
uses a location-local architecture requires low network
|
|
||||||
latency and proximity to the user to provide an
|
|
||||||
optimal user experience and reduce the cost of bandwidth and
|
|
||||||
transit. The content resides on sites closer to the customer,
|
|
||||||
instead of a centralized content store that requires utilizing
|
|
||||||
higher cost cross-country links.</para>
|
|
||||||
<para>This architecture includes a geo-location component
|
|
||||||
that places user requests to the closest possible node. In
|
|
||||||
this scenario, 100% redundancy of content across every site is
|
|
||||||
a goal rather than a requirement, with the intent to
|
|
||||||
maximize the amount of content available within a
|
|
||||||
minimum number of network hops for end users. Despite
|
|
||||||
these differences, the storage replication configuration has
|
|
||||||
significant overlap with that of a geo-redundant load
|
|
||||||
balancing use case.</para>
|
|
||||||
<para>In <xref linkend="multi-site_shared_shared_keystone"/>,
|
|
||||||
the application utilizing this multi-site OpenStack install
|
|
||||||
that is location-aware would launch web server or content
|
|
||||||
serving instances on the compute cluster in each site. Requests
|
|
||||||
from clients are first sent to a global services load balancer
|
|
||||||
that determines the location of the client, then routes the
|
|
||||||
request to the closest OpenStack site where the application
|
|
||||||
completes the request.</para>
|
|
||||||
<figure xml:id="multi-site_shared_shared_keystone">
|
|
||||||
<title>Multi-site shared keystone architecture</title>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="6in"
|
|
||||||
fileref="../figures/Multi-Site_shared_keystone1.png"/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
</figure>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,176 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="technical-considerations-multi-site">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Technical considerations</title>
|
|
||||||
<para>There are many technical considerations to take into account
|
|
||||||
with regard to designing a multi-site OpenStack
|
|
||||||
implementation. An OpenStack cloud can be designed in a
|
|
||||||
variety of ways to handle individual application needs. A
|
|
||||||
multi-site deployment has additional challenges compared
|
|
||||||
to single site installations and therefore is a more
|
|
||||||
complex solution.</para>
|
|
||||||
<para>When determining capacity options be sure to take into
|
|
||||||
account not just the technical issues, but also the economic
|
|
||||||
or operational issues that might arise from specific
|
|
||||||
decisions.</para>
|
|
||||||
<para>Inter-site link capacity describes the capabilities of the
|
|
||||||
connectivity between the different OpenStack sites. This
|
|
||||||
includes parameters such as bandwidth, latency, whether or not
|
|
||||||
a link is dedicated, and any business policies applied to the
|
|
||||||
connection. The capability and number of the links between
|
|
||||||
sites determine what kind of options are available for
|
|
||||||
deployment. For example, if two sites have a pair of
|
|
||||||
high-bandwidth links available between them, it may be wise to
|
|
||||||
configure a separate storage replication network between the
|
|
||||||
two sites to support a single Swift endpoint and a shared
|
|
||||||
Object Storage capability between them. An example of this
|
|
||||||
technique, as well as a configuration walk-through, is
|
|
||||||
available at <link
|
|
||||||
xlink:href="http://docs.openstack.org/developer/swift/replication_network.html#dedicated-replication-network">http://docs.openstack.org/developer/swift/replication_network.html#dedicated-replication-network</link>.
|
|
||||||
Another option in this scenario is to build a dedicated set of
|
|
||||||
tenant private networks across the secondary link, using
|
|
||||||
overlay networks with a third party mapping the site overlays
|
|
||||||
to each other.</para>
|
|
||||||
<para>The capacity requirements of the links between sites is
|
|
||||||
driven by application behavior. If the link latency is
|
|
||||||
too high, certain applications that use a large number of
|
|
||||||
small packets, for example RPC calls, may encounter issues
|
|
||||||
communicating with each other or operating properly.
|
|
||||||
Additionally, OpenStack may encounter similar types of issues.
|
|
||||||
To mitigate this, Identity service call timeouts can be
|
|
||||||
tuned to prevent issues authenticating against a central
|
|
||||||
Identity service.</para>
|
|
||||||
<para>Another network capacity consideration for a multi-site
|
|
||||||
deployment is the amount and performance of overlay networks
|
|
||||||
available for tenant networks. If using shared tenant networks
|
|
||||||
across zones, it is imperative that an external overlay manager
|
|
||||||
or controller be used to map these overlays together. It is
|
|
||||||
necessary to ensure the amount of possible IDs between the zones
|
|
||||||
are identical.</para>
|
|
||||||
<note>
|
|
||||||
<para>As of the Kilo release, OpenStack Networking was not
|
|
||||||
capable of managing tunnel IDs across installations. So if
|
|
||||||
one site runs out of IDs, but another does not, that tenant's
|
|
||||||
network is unable to reach the other site.</para>
|
|
||||||
</note>
|
|
||||||
<para>Capacity can take other forms as well. The ability for a
|
|
||||||
region to grow depends on scaling out the number of available
|
|
||||||
compute nodes. This topic is covered in greater detail in the
|
|
||||||
section for compute-focused deployments. However, it may be
|
|
||||||
necessary to grow cells in an individual region, depending on
|
|
||||||
the size of your cluster and the ratio of virtual machines per
|
|
||||||
hypervisor.</para>
|
|
||||||
<para>A third form of capacity comes in the multi-region-capable
|
|
||||||
components of OpenStack. Centralized Object Storage is capable
|
|
||||||
of serving objects through a single namespace across multiple
|
|
||||||
regions. Since this works by accessing the object store through
|
|
||||||
swift proxy, it is possible to overload the proxies. There are
|
|
||||||
two options available to mitigate this issue:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Deploy a large number of swift proxies. The drawback is
|
|
||||||
that the proxies are not load-balanced and a large file
|
|
||||||
request could continually hit the same proxy.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Add a caching HTTP proxy and load balancer in front of
|
|
||||||
the swift proxies. Since swift objects are returned to the
|
|
||||||
requester via HTTP, this load balancer would alleviate the
|
|
||||||
load required on the swift proxies.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<section xml:id="utilization-multi-site"><title>Utilization</title>
|
|
||||||
<para>While constructing a multi-site OpenStack environment is the
|
|
||||||
goal of this guide, the real test is whether an application
|
|
||||||
can utilize it.</para>
|
|
||||||
<para>The Identity service is normally the first interface for
|
|
||||||
OpenStack users and is required for almost all major operations
|
|
||||||
within OpenStack. Therefore, it is important that you provide users
|
|
||||||
with a single URL for Identity service authentication, and
|
|
||||||
document the configuration of regions within the Identity service.
|
|
||||||
Each of the sites defined in your installation is considered
|
|
||||||
to be a region in Identity nomenclature. This is important for
|
|
||||||
the users, as it is required to define the region name when
|
|
||||||
providing actions to an API endpoint or in the dashboard.</para>
|
|
||||||
<para>Load balancing is another common issue with multi-site
|
|
||||||
installations. While it is still possible to run HAproxy
|
|
||||||
instances with Load-Balancer-as-a-Service, these are defined
|
|
||||||
to a specific region. Some applications can manage this using
|
|
||||||
internal mechanisms. Other applications may require the
|
|
||||||
implementation of an external system, including global services
|
|
||||||
load balancers or anycast-advertised DNS.</para>
|
|
||||||
<para>Depending on the storage model chosen during site design,
|
|
||||||
storage replication and availability are also a concern
|
|
||||||
for end-users. If an application can support regions, then it
|
|
||||||
is possible to keep the object storage system separated by region.
|
|
||||||
In this case, users who want to have an object available to
|
|
||||||
more than one region need to perform cross-site replication.
|
|
||||||
However, with a centralized swift proxy, the user may need to
|
|
||||||
benchmark the replication timing of the Object Storage back end.
|
|
||||||
Benchmarking allows the operational staff to provide users with
|
|
||||||
an understanding of the amount of time required for a stored or
|
|
||||||
modified object to become available to the entire environment.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="performance"><title>Performance</title>
|
|
||||||
<para>Determining the performance of a multi-site installation
|
|
||||||
involves considerations that do not come into play in a
|
|
||||||
single-site deployment. Being a distributed deployment,
|
|
||||||
performance in multi-site deployments may be affected in certain
|
|
||||||
situations.</para>
|
|
||||||
<para>Since multi-site systems can be geographically separated,
|
|
||||||
there may be greater latency or jitter when communicating across
|
|
||||||
regions. This can especially impact systems like the OpenStack
|
|
||||||
Identity service when making authentication attempts from regions
|
|
||||||
that do not contain the centralized Identity implementation. It
|
|
||||||
can also affect applications which rely on Remote Procedure Call (RPC)
|
|
||||||
for normal operation. An example of this can be seen in high
|
|
||||||
performance computing workloads.</para>
|
|
||||||
<para>Storage availability can also be impacted by the
|
|
||||||
architecture of a multi-site deployment. A centralized Object
|
|
||||||
Storage service requires more time for an object to be
|
|
||||||
available to instances locally in regions where the object was
|
|
||||||
not created. Some applications may need to be tuned to account
|
|
||||||
for this effect. Block Storage does not currently have a
|
|
||||||
method for replicating data across multiple regions, so
|
|
||||||
applications that depend on available block storage need
|
|
||||||
to manually cope with this limitation by creating duplicate
|
|
||||||
block storage entries in each region.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="openstack-components_multi-site">
|
|
||||||
<title>OpenStack components</title>
|
|
||||||
<para>Most OpenStack installations require a bare minimum set of
|
|
||||||
pieces to function. These include the OpenStack Identity
|
|
||||||
(keystone) for authentication, OpenStack Compute
|
|
||||||
(nova) for compute, OpenStack Image service (glance) for image
|
|
||||||
storage, OpenStack Networking (neutron) for networking, and
|
|
||||||
potentially an object store in the form of OpenStack Object
|
|
||||||
Storage (swift). Deploying a multi-site installation also demands extra
|
|
||||||
components in order to coordinate between regions. A centralized
|
|
||||||
Identity service is necessary to provide the single authentication
|
|
||||||
point. A centralized dashboard is also recommended to provide a
|
|
||||||
single login point and a mapping to the API and CLI
|
|
||||||
options available. A centralized Object Storage service may also
|
|
||||||
be used, but will require the installation of the swift proxy
|
|
||||||
service.</para>
|
|
||||||
<para>It may also be helpful to install a few extra options in
|
|
||||||
order to facilitate certain use cases. For example,
|
|
||||||
installing Designate may assist in automatically generating
|
|
||||||
DNS domains for each region with an automatically-populated
|
|
||||||
zone full of resource records for each instance. This
|
|
||||||
facilitates using DNS as a mechanism for determining which
|
|
||||||
region will be selected for certain applications.</para>
|
|
||||||
<para>Another useful tool for managing a multi-site installation
|
|
||||||
is Orchestration (heat). The Orchestration service allows the
|
|
||||||
use of templates to define a set of instances to be launched
|
|
||||||
together or for scaling existing sets. It can also be used to
|
|
||||||
set up matching or differentiated groupings based on
|
|
||||||
regions. For instance, if an application requires an equally
|
|
||||||
balanced number of nodes across sites, the same heat template
|
|
||||||
can be used to cover each site with small alterations to only
|
|
||||||
the region name.</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,176 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="user-requirements-multi-site">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>User requirements</title>
|
|
||||||
<section xml:id="workload-characteristics">
|
|
||||||
<title>Workload characteristics</title>
|
|
||||||
<para>An understanding of the expected workloads for a desired
|
|
||||||
multi-site environment and use case is an important factor in
|
|
||||||
the decision-making process. In this context, <literal>workload</literal>
|
|
||||||
refers to the way the systems are used. A workload could be a
|
|
||||||
single application or a suite of applications that work together.
|
|
||||||
It could also be a duplicate set of applications that need to
|
|
||||||
run in multiple cloud environments. Often in a multi-site deployment,
|
|
||||||
the same workload will need to work identically in more than one
|
|
||||||
physical location.</para>
|
|
||||||
<para>This multi-site scenario likely includes one or more of the
|
|
||||||
other scenarios in this book with the additional requirement
|
|
||||||
of having the workloads in two or more locations. The
|
|
||||||
following are some possible scenarios:</para>
|
|
||||||
<para>For many use cases the proximity of the user to their
|
|
||||||
workloads has a direct influence on the performance of the
|
|
||||||
application and therefore should be taken into consideration
|
|
||||||
in the design. Certain applications require zero to minimal
|
|
||||||
latency that can only be achieved by deploying the cloud in
|
|
||||||
multiple locations. These locations could be in different data
|
|
||||||
centers, cities, countries or geographical regions, depending
|
|
||||||
on the user requirement and location of the users.</para></section>
|
|
||||||
<section xml:id="consistency-images-templates-across-sites">
|
|
||||||
<title>Consistency of images and templates across different
|
|
||||||
sites</title>
|
|
||||||
<para>It is essential that the deployment of instances is
|
|
||||||
consistent across the different sites and built
|
|
||||||
into the infrastructure. If the OpenStack Object Storage is used as
|
|
||||||
a back end for the Image service, it is possible to create repositories
|
|
||||||
of consistent images across multiple sites. Having central
|
|
||||||
endpoints with multiple storage nodes allows consistent centralized
|
|
||||||
storage for every site.</para>
|
|
||||||
<para>Not using a centralized object store increases the operational
|
|
||||||
overhead of maintaining a consistent image library. This
|
|
||||||
could include development of a replication mechanism to handle
|
|
||||||
the transport of images and the changes to the images across
|
|
||||||
multiple sites.</para></section>
|
|
||||||
<section xml:id="high-availability-multi-site">
|
|
||||||
<title>High availability</title>
|
|
||||||
<para>If high availability is a requirement to provide continuous
|
|
||||||
infrastructure operations, a basic requirement of high
|
|
||||||
availability should be defined.</para>
|
|
||||||
<para>The OpenStack management components need to have a basic and
|
|
||||||
minimal level of redundancy. The simplest example is the loss
|
|
||||||
of any single site should have minimal impact on the
|
|
||||||
availability of the OpenStack services.</para>
|
|
||||||
<para>The <link
|
|
||||||
xlink:href="http://docs.openstack.org/ha-guide/"><citetitle>OpenStack
|
|
||||||
High Availability Guide</citetitle></link>
|
|
||||||
contains more information on how to provide redundancy for the
|
|
||||||
OpenStack components.</para>
|
|
||||||
<para>Multiple network links should be deployed between sites to
|
|
||||||
provide redundancy for all components. This includes storage
|
|
||||||
replication, which should be isolated to a dedicated network
|
|
||||||
or VLAN with the ability to assign QoS to control the
|
|
||||||
replication traffic or provide priority for this traffic. Note
|
|
||||||
that if the data store is highly changeable, the network
|
|
||||||
requirements could have a significant effect on the
|
|
||||||
operational cost of maintaining the sites.</para>
|
|
||||||
<para>The ability to maintain object availability in both sites
|
|
||||||
has significant implications on the object storage design and
|
|
||||||
implementation. It also has a significant impact on the
|
|
||||||
WAN network design between the sites.</para>
|
|
||||||
<para>Connecting more than two sites increases the challenges and
|
|
||||||
adds more complexity to the design considerations. Multi-site
|
|
||||||
implementations require planning to address the additional
|
|
||||||
topology used for internal and external connectivity. Some options
|
|
||||||
include full mesh topology, hub spoke, spine leaf, and 3D Torus.</para>
|
|
||||||
<para>If applications running in a cloud are not cloud-aware, there
|
|
||||||
should be clear measures and expectations to define what the
|
|
||||||
infrastructure can and cannot support. An example would be
|
|
||||||
shared storage between sites. It is possible, however such a
|
|
||||||
solution is not native to OpenStack and requires a third-party
|
|
||||||
hardware vendor to fulfill such a requirement. Another example
|
|
||||||
can be seen in applications that are able to consume resources
|
|
||||||
in object storage directly. These applications need to be
|
|
||||||
cloud aware to make good use of an OpenStack Object
|
|
||||||
Store.</para></section>
|
|
||||||
<section xml:id="application-readiness">
|
|
||||||
<title>Application readiness</title>
|
|
||||||
<para>Some applications are tolerant of the lack of synchronized
|
|
||||||
object storage, while others may need those objects to be
|
|
||||||
replicated and available across regions. Understanding how
|
|
||||||
the cloud implementation impacts new and existing applications
|
|
||||||
is important for risk mitigation, and the overall success of a
|
|
||||||
cloud project. Applications may have to be written or rewritten
|
|
||||||
for an infrastructure with little to no redundancy, or with the
|
|
||||||
cloud in mind.</para></section>
|
|
||||||
<section xml:id="cost-multi-site">
|
|
||||||
<title>Cost</title>
|
|
||||||
<para>A greater number of sites increase cost and complexity for a
|
|
||||||
multi-site deployment. Costs can be broken down into the following
|
|
||||||
categories:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Compute resources</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Networking resources</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Replication</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Storage</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Management</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Operational costs</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist></section>
|
|
||||||
<section xml:id="site-loss-and-recovery">
|
|
||||||
<title>Site loss and recovery</title>
|
|
||||||
<para>Outages can cause partial or full loss of site functionality.
|
|
||||||
Strategies should be implemented to understand and plan for recovery
|
|
||||||
scenarios.</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>The deployed applications need to continue to
|
|
||||||
function and, more importantly, you must consider the
|
|
||||||
impact on the performance and reliability of the application
|
|
||||||
when a site is unavailable.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>It is important to understand what happens to the
|
|
||||||
replication of objects and data between the sites when
|
|
||||||
a site goes down. If this causes queues to start
|
|
||||||
building up, consider how long these queues can
|
|
||||||
safely exist until an error occurs.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>After an outage, ensure the method for resuming proper
|
|
||||||
operations of a site is implemented when it comes back online.
|
|
||||||
We recommend you architect the recovery to avoid race conditions.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist></section>
|
|
||||||
<section xml:id="compliance-and-geo-location-multi-site">
|
|
||||||
<title>Compliance and geo-location</title>
|
|
||||||
<para>An organization may have certain legal obligations and
|
|
||||||
regulatory compliance measures which could require certain
|
|
||||||
workloads or data to not be located in certain regions.</para></section>
|
|
||||||
<section xml:id="auditing-multi-site">
|
|
||||||
<title>Auditing</title>
|
|
||||||
<para>A well thought-out auditing strategy is important in order
|
|
||||||
to be able to quickly track down issues. Keeping track of
|
|
||||||
changes made to security groups and tenant changes can be
|
|
||||||
useful in rolling back the changes if they affect production.
|
|
||||||
For example, if all security group rules for a tenant
|
|
||||||
disappeared, the ability to quickly track down the issue would
|
|
||||||
be important for operational and legal reasons.</para></section>
|
|
||||||
<section xml:id="separation-of-duties">
|
|
||||||
<title>Separation of duties</title>
|
|
||||||
<para>A common requirement is to define different roles for the
|
|
||||||
different cloud administration functions. An example would be
|
|
||||||
a requirement to segregate the duties and permissions by
|
|
||||||
site.</para></section>
|
|
||||||
<section xml:id="authentication-between-sites">
|
|
||||||
<title>Authentication between sites</title>
|
|
||||||
<para>It is recommended to have a single authentication domain
|
|
||||||
rather than a separate implementation for each and every
|
|
||||||
site. This requires an authentication mechanism that is highly
|
|
||||||
available and distributed to ensure continuous operation.
|
|
||||||
Authentication server locality might be required and should be
|
|
||||||
planned for.</para></section>
|
|
||||||
</section>
|
|
@ -1,184 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="architecture-network-focus">
|
|
||||||
<title>Architecture</title>
|
|
||||||
<para>Network-focused OpenStack architectures have many similarities to
|
|
||||||
other OpenStack architecture use cases. There are several factors
|
|
||||||
to consider when designing for a network-centric or network-heavy
|
|
||||||
application environment.</para>
|
|
||||||
<para>Networks exist to serve as a medium of transporting data between
|
|
||||||
systems. It is inevitable that an OpenStack design has inter-dependencies
|
|
||||||
with non-network portions of OpenStack as well as on external systems.
|
|
||||||
Depending on the specific workload, there may be major interactions with
|
|
||||||
storage systems both within and external to the OpenStack environment.
|
|
||||||
For example, in the case of content delivery network, there is twofold
|
|
||||||
interaction with storage. Traffic flows to and from the storage array for
|
|
||||||
ingesting and serving content in a north-south direction. In addition,
|
|
||||||
there is replication traffic flowing in an east-west direction.</para>
|
|
||||||
<para>Compute-heavy workloads may also induce interactions with the
|
|
||||||
network. Some high performance compute applications require network-based
|
|
||||||
memory mapping and data sharing and, as a result, induce a higher network
|
|
||||||
load when they transfer results and data sets. Others may be highly
|
|
||||||
transactional and issue transaction locks, perform their functions, and
|
|
||||||
revoke transaction locks at high rates. This also has an impact on the
|
|
||||||
network performance.</para>
|
|
||||||
<para>Some network dependencies are external to OpenStack. While
|
|
||||||
OpenStack Networking is capable of providing network ports, IP addresses,
|
|
||||||
some level of routing, and overlay networks, there are some other
|
|
||||||
functions that it cannot provide. For many of these, you may require
|
|
||||||
external systems or equipment to fill in the functional gaps. Hardware
|
|
||||||
load balancers are an example of equipment that may be necessary to
|
|
||||||
distribute workloads or offload certain functions. OpenStack Networking
|
|
||||||
provides a tunneling feature, however it is constrained to a
|
|
||||||
Networking-managed region. If the need arises to extend a tunnel beyond
|
|
||||||
the OpenStack region to either another region or an external system,
|
|
||||||
implement the tunnel itself outside OpenStack or use a tunnel management
|
|
||||||
system to map the tunnel or overlay to an external tunnel.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Depending on the selected design, Networking itself might not
|
|
||||||
support the required <glossterm baseform="Layer-3 network">layer-3
|
|
||||||
network</glossterm> functionality. If you choose to use the
|
|
||||||
provider networking mode without running the layer-3 agent, you
|
|
||||||
must install an external router to provide layer-3 connectivity
|
|
||||||
to outside systems.
|
|
||||||
</para>
|
|
||||||
<para>Interaction with orchestration services is inevitable in
|
|
||||||
larger-scale deployments. The Orchestration service is capable of
|
|
||||||
allocating network resource defined in templates to map to tenant
|
|
||||||
networks and for port creation, as well as allocating floating IPs.
|
|
||||||
If there is a requirement to define and manage network resources when
|
|
||||||
using orchestration, we recommend that the design include the
|
|
||||||
Orchestration service to meet the demands of users.</para>
|
|
||||||
<section xml:id="design-impacts">
|
|
||||||
<title>Design impacts</title>
|
|
||||||
<para>A wide variety of factors can affect a network-focused OpenStack
|
|
||||||
architecture. While there are some considerations shared with a general
|
|
||||||
use case, specific workloads related to network requirements influence
|
|
||||||
network design decisions.</para>
|
|
||||||
<para>One decision includes whether or not to use Network Address
|
|
||||||
Translation (NAT) and where to implement it. If there is a requirement
|
|
||||||
for floating IPs instead of public fixed addresses then you must use
|
|
||||||
NAT. An example of this is a DHCP relay that must know the IP of the
|
|
||||||
DHCP server. In these cases it is easier to automate the infrastructure
|
|
||||||
to apply the target IP to a new instance rather than to reconfigure
|
|
||||||
legacy or external systems for each new instance.</para>
|
|
||||||
<para>NAT for floating IPs managed by Networking resides within the
|
|
||||||
hypervisor but there are also versions of NAT that may be running
|
|
||||||
elsewhere. If there is a shortage of IPv4 addresses there are two common
|
|
||||||
methods to mitigate this externally to OpenStack. The first is to run a
|
|
||||||
load balancer either within OpenStack as an instance, or use an external
|
|
||||||
load balancing solution. In the internal scenario, Networking's
|
|
||||||
Load-Balancer-as-a-Service (LBaaS) can manage load balancing
|
|
||||||
software, for example HAproxy. This is specifically to manage the
|
|
||||||
Virtual IP (VIP) while a dual-homed connection from the HAproxy instance
|
|
||||||
connects the public network with the tenant private network that hosts
|
|
||||||
all of the content servers. In the external scenario, a load balancer
|
|
||||||
needs to serve the VIP and also connect to the tenant overlay
|
|
||||||
network through external means or through private addresses.</para>
|
|
||||||
<para>Another kind of NAT that may be useful is protocol NAT. In some
|
|
||||||
cases it may be desirable to use only IPv6 addresses on instances and
|
|
||||||
operate either an instance or an external service to provide a NAT-based
|
|
||||||
transition technology such as NAT64 and DNS64. This provides the ability
|
|
||||||
to have a globally routable IPv6 address while only consuming IPv4
|
|
||||||
addresses as necessary or in a shared manner.</para>
|
|
||||||
<para>Application workloads affect the design of the underlying network
|
|
||||||
architecture. If a workload requires network-level redundancy, the
|
|
||||||
routing and switching architecture have to accommodate this. There
|
|
||||||
are differing methods for providing this that are dependent on the
|
|
||||||
selected network hardware, the performance of the hardware, and which
|
|
||||||
networking model you deploy. Examples include
|
|
||||||
Link aggregation (LAG) and Hot Standby Router Protocol (HSRP). Also
|
|
||||||
consider whether to deploy OpenStack Networking or
|
|
||||||
legacy networking (nova-network), and which plug-in to select for
|
|
||||||
OpenStack Networking. If using an external system, configure Networking
|
|
||||||
to run <glossterm baseform="Layer-2 network">layer 2</glossterm>
|
|
||||||
with a provider network configuration. For example, implement HSRP
|
|
||||||
to terminate layer-3 connectivity.</para>
|
|
||||||
<para>Depending on the workload, overlay networks may not be the best
|
|
||||||
solution. Where application network connections are
|
|
||||||
small, short lived, or bursty, running a dynamic overlay can generate
|
|
||||||
as much bandwidth as the packets it carries. It also can induce enough
|
|
||||||
latency to cause issues with certain applications. There is an impact
|
|
||||||
to the device generating the overlay which, in most installations,
|
|
||||||
is the hypervisor. This causes performance degradation on packet
|
|
||||||
per second and connection per second rates.</para>
|
|
||||||
<para>Overlays also come with a secondary option that may not be
|
|
||||||
appropriate to a specific workload. While all of them operate in full
|
|
||||||
mesh by default, there might be good reasons to disable this function
|
|
||||||
because it may cause excessive overhead for some workloads. Conversely,
|
|
||||||
other workloads operate without issue. For example, most web services
|
|
||||||
applications do not have major issues with a full mesh overlay network,
|
|
||||||
while some network monitoring tools or storage replication workloads
|
|
||||||
have performance issues with throughput or excessive broadcast
|
|
||||||
traffic.</para>
|
|
||||||
<para>Many people overlook an important design decision: The choice of
|
|
||||||
layer-3 protocols. While OpenStack was initially built with only IPv4
|
|
||||||
support, Networking now supports IPv6 and dual-stacked networks.
|
|
||||||
Some workloads are possible through the use of IPv6 and IPv6 to IPv4
|
|
||||||
reverse transition mechanisms such as NAT64 and DNS64 or
|
|
||||||
<glossterm>6to4</glossterm>.
|
|
||||||
This alters the requirements for any address plan as single-stacked and
|
|
||||||
transitional IPv6 deployments can alleviate the need for IPv4
|
|
||||||
addresses.</para>
|
|
||||||
<para>OpenStack has limited support for
|
|
||||||
dynamic routing, however there are a number of options available by
|
|
||||||
incorporating third party solutions to implement routing within the
|
|
||||||
cloud including network equipment, hardware nodes, and instances. Some
|
|
||||||
workloads perform well with nothing more than static routes and default
|
|
||||||
gateways configured at the layer-3 termination point. In most cases this
|
|
||||||
is sufficient, however some cases require the addition of at least one
|
|
||||||
type of dynamic routing protocol if not multiple protocols. Having a
|
|
||||||
form of interior gateway protocol (IGP) available to the instances
|
|
||||||
inside an OpenStack installation opens up the possibility of use cases
|
|
||||||
for anycast route injection for services that need to use it as a
|
|
||||||
geographic location or failover mechanism. Other applications may wish
|
|
||||||
to directly participate in a routing protocol, either as a passive
|
|
||||||
observer, as in the case of a looking glass, or as an active participant
|
|
||||||
in the form of a route reflector. Since an instance might have a large
|
|
||||||
amount of compute and memory resources, it is trivial to hold an entire
|
|
||||||
unpartitioned routing table and use it to provide services such as
|
|
||||||
network path visibility to other applications or as a monitoring
|
|
||||||
tool.</para>
|
|
||||||
<para>Path maximum transmission unit (MTU) failures are lesser known but
|
|
||||||
harder to diagnose. The MTU must be large enough to handle normal
|
|
||||||
traffic, overhead from an overlay network, and the desired layer-3
|
|
||||||
protocol. Adding externally built tunnels reduces the MTU packet size.
|
|
||||||
In this case, you must pay attention to the fully
|
|
||||||
calculated MTU size because some systems ignore or
|
|
||||||
drop path MTU discovery packets.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="tunables">
|
|
||||||
<title>Tunable networking components</title>
|
|
||||||
<para>Consider configurable networking components related to an
|
|
||||||
OpenStack architecture design when designing for network intensive
|
|
||||||
workloads that include MTU and QoS. Some workloads require a larger MTU
|
|
||||||
than normal due to the transfer of large blocks of data.
|
|
||||||
When providing network service for applications such as video
|
|
||||||
streaming or storage replication, we recommend that you configure
|
|
||||||
both OpenStack hardware nodes and the supporting network equipment
|
|
||||||
for jumbo frames where possible. This allows for better use of
|
|
||||||
available bandwidth. Configure jumbo frames
|
|
||||||
across the complete path the packets traverse. If one network
|
|
||||||
component is not capable of handling jumbo frames then the entire
|
|
||||||
path reverts to the default MTU.</para>
|
|
||||||
<para>Quality of Service (QoS) also has a great impact on network
|
|
||||||
intensive workloads as it provides instant service to packets which
|
|
||||||
have a higher priority due to the impact of poor
|
|
||||||
network performance. In applications such as Voice over IP (VoIP),
|
|
||||||
differentiated services code points are a near requirement for proper
|
|
||||||
operation. You can also use QoS in the opposite direction for mixed
|
|
||||||
workloads to prevent low priority but high bandwidth applications,
|
|
||||||
for example backup services, video conferencing, or file sharing,
|
|
||||||
from blocking bandwidth that is needed for the proper operation of
|
|
||||||
other workloads. It is possible to tag file storage traffic as a
|
|
||||||
lower class, such as best effort or scavenger, to allow the higher
|
|
||||||
priority traffic through. In cases where regions within a cloud might
|
|
||||||
be geographically distributed it may also be necessary to plan
|
|
||||||
accordingly to implement WAN optimization to combat latency or
|
|
||||||
packet loss.</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,68 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="operational-considerations-networking-focus">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Operational considerations</title>
|
|
||||||
<para>Network-focused OpenStack clouds have a number of operational
|
|
||||||
considerations that influence the selected design, including:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Dynamic routing of static routes</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Service level agreements (SLAs)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Ownership of user management</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>An initial network consideration is the selection of a telecom
|
|
||||||
company or transit provider.</para>
|
|
||||||
<para>Make additional design decisions about monitoring and alarming.
|
|
||||||
This can be an internal responsibility or the responsibility of the
|
|
||||||
external provider. In the case of using an external provider, service
|
|
||||||
level agreements (SLAs) likely apply. In addition, other operational
|
|
||||||
considerations such as bandwidth, latency, and jitter can be part of an
|
|
||||||
SLA.</para>
|
|
||||||
<para>Consider the ability to upgrade the infrastructure. As demand for
|
|
||||||
network resources increase, operators add additional IP address blocks
|
|
||||||
and add additional bandwidth capacity. In addition, consider managing
|
|
||||||
hardware and software life cycle events, for example upgrades,
|
|
||||||
decommissioning, and outages, while avoiding service interruptions for
|
|
||||||
tenants.</para>
|
|
||||||
<para>Factor maintainability into the overall network design. This
|
|
||||||
includes the ability to manage and maintain IP addresses as well as the
|
|
||||||
use of overlay identifiers including VLAN tag IDs, GRE tunnel IDs, and
|
|
||||||
MPLS tags. As an example, if you may need to change all of the IP
|
|
||||||
addresses on a network, a process known as renumbering, then the design
|
|
||||||
must support this function.</para>
|
|
||||||
<para>Address network-focused applications when considering certain
|
|
||||||
operational realities. For example, consider the impending exhaustion
|
|
||||||
of IPv4 addresses, the migration to IPv6, and the use of private
|
|
||||||
networks to segregate different types of traffic that an application
|
|
||||||
receives or generates. In the case of IPv4 to IPv6 migrations,
|
|
||||||
applications should follow best practices for storing IP addresses.
|
|
||||||
We recommend you avoid relying on IPv4 features that did not carry over
|
|
||||||
to the IPv6 protocol or have differences in implementation.</para>
|
|
||||||
<para>To segregate traffic, allow applications to create a private tenant
|
|
||||||
network for database and storage network traffic. Use a public network
|
|
||||||
for services that require direct client access from the internet. Upon
|
|
||||||
segregating the traffic, consider quality of service (QoS) and security
|
|
||||||
to ensure each network has the required level of service.</para>
|
|
||||||
<para>Finally, consider the routing of network traffic.
|
|
||||||
For some applications, develop a complex policy framework for
|
|
||||||
routing. To create a routing policy that satisfies business requirements,
|
|
||||||
consider the economic cost of transmitting traffic over expensive links
|
|
||||||
versus cheaper links, in addition to bandwidth, latency, and jitter
|
|
||||||
requirements.</para>
|
|
||||||
<para>Additionally, consider how to respond to network events. As an
|
|
||||||
example, how load transfers from one link to another during a
|
|
||||||
failure scenario could be a factor in the design. If you do not plan
|
|
||||||
network capacity correctly, failover traffic could overwhelm other ports
|
|
||||||
or network links and create a cascading failure scenario. In this case,
|
|
||||||
traffic that fails over to one link overwhelms that link and then moves
|
|
||||||
to the subsequent links until all network traffic stops.</para>
|
|
||||||
</section>
|
|
@ -1,209 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="prescriptive-example-large-scale-web-app">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Prescriptive examples</title>
|
|
||||||
<para>An organization designs a large-scale web application with cloud
|
|
||||||
principles in mind. The application scales
|
|
||||||
horizontally in a bursting fashion and generates a high
|
|
||||||
instance count. The application requires an SSL connection to
|
|
||||||
secure data and must not lose connection state to individual
|
|
||||||
servers.</para>
|
|
||||||
<para>The figure below depicts an example design for this workload.
|
|
||||||
In this example, a hardware load balancer provides SSL offload
|
|
||||||
functionality and connects
|
|
||||||
to tenant networks in order to reduce address consumption.
|
|
||||||
This load balancer links to the routing architecture as it
|
|
||||||
services the VIP for the application. The router and load
|
|
||||||
balancer use the GRE tunnel ID of the
|
|
||||||
application's tenant network and an IP address within
|
|
||||||
the tenant subnet but outside of the address pool. This is to
|
|
||||||
ensure that the load balancer can communicate with the
|
|
||||||
application's HTTP servers without requiring the consumption
|
|
||||||
of a public IP address.</para>
|
|
||||||
<para>Because sessions persist until closed, the routing and
|
|
||||||
switching architecture provides high availability.
|
|
||||||
Switches mesh to each hypervisor and each other, and
|
|
||||||
also provide an MLAG implementation to ensure that layer-2
|
|
||||||
connectivity does not fail. Routers use VRRP
|
|
||||||
and fully mesh with switches to ensure layer-3 connectivity.
|
|
||||||
Since GRE is provides an overlay network, Networking is present
|
|
||||||
and uses the Open vSwitch agent in GRE tunnel
|
|
||||||
mode. This ensures all devices can reach all other devices and
|
|
||||||
that you can create tenant networks for private addressing
|
|
||||||
links to the load balancer.
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/Network_Web_Services1.png"
|
|
||||||
/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject></para>
|
|
||||||
<para>A web service architecture has many options and optional
|
|
||||||
components. Due to this, it can fit into a large number of
|
|
||||||
other OpenStack designs. A few key components, however, need
|
|
||||||
to be in place to handle the nature of most web-scale
|
|
||||||
workloads. You require the following components:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Controller services (Image, Identity,
|
|
||||||
Networking and supporting services such as MariaDB and
|
|
||||||
RabbitMQ)</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Compute running KVM hypervisor</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>OpenStack Object Storage</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Orchestration service</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Telemetry service</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Beyond the normal Identity, Compute, Image service, and Object
|
|
||||||
Storage components, we recommend the Orchestration service
|
|
||||||
component to handle the proper scaling of workloads to adjust to
|
|
||||||
demand. Due to the requirement for auto-scaling,
|
|
||||||
the design includes the Telemetry service. Web services
|
|
||||||
tend to be bursty in load, have very defined peak and valley
|
|
||||||
usage patterns and, as a result, benefit from automatic scaling
|
|
||||||
of instances based upon traffic. At a network level, a split
|
|
||||||
network configuration works well with databases residing on
|
|
||||||
private tenant networks since these do not emit a large quantity
|
|
||||||
of broadcast traffic and may need to interconnect to some
|
|
||||||
databases for content.
|
|
||||||
</para>
|
|
||||||
<section xml:id="load-balancing">
|
|
||||||
<title>Load balancing</title>
|
|
||||||
<para>Load balancing spreads requests across multiple instances.
|
|
||||||
This workload scales well horizontally across large numbers of
|
|
||||||
instances. This enables instances to run without publicly
|
|
||||||
routed IP addresses and instead to rely on the load
|
|
||||||
balancer to provide a globally reachable service.
|
|
||||||
Many of these services do not require
|
|
||||||
direct server return. This aids in address planning and
|
|
||||||
utilization at scale since only the virtual IP (VIP) must be
|
|
||||||
public.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="overlay-networks">
|
|
||||||
<title>Overlay networks</title>
|
|
||||||
<para>
|
|
||||||
The overlay functionality design includes OpenStack Networking
|
|
||||||
in Open vSwitch GRE tunnel mode.
|
|
||||||
In this case, the layer-3 external routers pair with
|
|
||||||
VRRP, and switches pair with an implementation of
|
|
||||||
MLAG to ensure that you do not lose connectivity with
|
|
||||||
the upstream routing infrastructure.
|
|
||||||
</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="performance-tuning">
|
|
||||||
<title>Performance tuning</title>
|
|
||||||
<para>Network level tuning for this workload is minimal.
|
|
||||||
Quality-of-Service (QoS) applies to these workloads
|
|
||||||
for a middle ground Class Selector depending on existing
|
|
||||||
policies. It is higher than a best effort queue but lower
|
|
||||||
than an Expedited Forwarding or Assured Forwarding queue.
|
|
||||||
Since this type of application generates larger packets with
|
|
||||||
longer-lived connections, you can optimize bandwidth utilization
|
|
||||||
for long duration TCP. Normal bandwidth planning
|
|
||||||
applies here with regards to benchmarking a session's usage
|
|
||||||
multiplied by the expected number of concurrent sessions with
|
|
||||||
overhead.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="network-functions">
|
|
||||||
<title>Network functions</title>
|
|
||||||
<para>Network functions is a broad category but encompasses
|
|
||||||
workloads that support the rest of a system's network. These
|
|
||||||
workloads tend to consist of large amounts of small packets
|
|
||||||
that are very short lived, such as DNS queries or SNMP traps.
|
|
||||||
These messages need to arrive quickly and do not deal with
|
|
||||||
packet loss as there can be a very large volume of them. There
|
|
||||||
are a few extra considerations to take into account for this
|
|
||||||
type of workload and this can change a configuration all the
|
|
||||||
way to the hypervisor level. For an application that generates
|
|
||||||
10 TCP sessions per user with an average bandwidth of 512
|
|
||||||
kilobytes per second per flow and expected user count of ten
|
|
||||||
thousand concurrent users, the expected bandwidth plan is
|
|
||||||
approximately 4.88 gigabits per second.</para>
|
|
||||||
<para>The supporting network for this type of configuration needs
|
|
||||||
to have a low latency and evenly distributed availability.
|
|
||||||
This workload benefits from having services local to the
|
|
||||||
consumers of the service. Use a multi-site approach as
|
|
||||||
well as deploying many copies of the application to handle
|
|
||||||
load as close as possible to consumers. Since these
|
|
||||||
applications function independently, they do not warrant
|
|
||||||
running overlays to interconnect tenant networks. Overlays
|
|
||||||
also have the drawback of performing poorly with rapid flow
|
|
||||||
setup and may incur too much overhead with large quantities of
|
|
||||||
small packets and therefore we do not recommend them.</para>
|
|
||||||
<para>QoS is desirable for some workloads to ensure delivery. DNS
|
|
||||||
has a major impact on the load times of other services and
|
|
||||||
needs to be reliable and provide rapid responses. Configure rules
|
|
||||||
in upstream devices to apply a higher Class
|
|
||||||
Selector to DNS to ensure faster delivery or a better spot in
|
|
||||||
queuing algorithms.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="cloud-storage">
|
|
||||||
<title>Cloud storage</title>
|
|
||||||
<para>Another common use case for OpenStack environments is providing
|
|
||||||
a cloud-based file storage and sharing service. You might
|
|
||||||
consider this a storage-focused use case, but its network-side
|
|
||||||
requirements make it a network-focused use case.</para>
|
|
||||||
<para>For example, consider a cloud backup application. This workload
|
|
||||||
has two specific behaviors that impact the network. Because this
|
|
||||||
workload is an externally-facing service and an
|
|
||||||
internally-replicating application, it has both <glossterm
|
|
||||||
baseform="north-south traffic">north-south</glossterm> and
|
|
||||||
<glossterm>east-west traffic</glossterm>
|
|
||||||
considerations:</para>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>north-south traffic</term>
|
|
||||||
<listitem>
|
|
||||||
<para>When a user uploads and stores content, that content moves
|
|
||||||
into the OpenStack installation. When users download this
|
|
||||||
content, the content moves out from the OpenStack
|
|
||||||
installation. Because this service operates primarily
|
|
||||||
as a backup, most of the traffic moves southbound into the
|
|
||||||
environment. In this situation, it benefits you to
|
|
||||||
configure a network to be asymmetrically downstream
|
|
||||||
because the traffic that enters the OpenStack installation
|
|
||||||
is greater than the traffic that leaves the installation.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>east-west traffic</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Likely to be fully symmetric. Because replication
|
|
||||||
originates from any node and might target multiple other
|
|
||||||
nodes algorithmically, it is less likely for this traffic
|
|
||||||
to have a larger volume in any specific direction. However
|
|
||||||
this traffic might interfere with north-south traffic.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
<mediaobject>
|
|
||||||
<imageobject>
|
|
||||||
<imagedata contentwidth="4in"
|
|
||||||
fileref="../figures/Network_Cloud_Storage2.png"
|
|
||||||
/>
|
|
||||||
</imageobject>
|
|
||||||
</mediaobject>
|
|
||||||
<para>This application prioritizes the north-south traffic over
|
|
||||||
east-west traffic: the north-south traffic involves
|
|
||||||
customer-facing data.</para>
|
|
||||||
<para>The network design in this case is less dependent on
|
|
||||||
availability and more dependent on being able to handle high
|
|
||||||
bandwidth. As a direct result, it is beneficial to forgo
|
|
||||||
redundant links in favor of bonding those connections. This
|
|
||||||
increases available bandwidth. It is also beneficial to
|
|
||||||
configure all devices in the path, including OpenStack, to
|
|
||||||
generate and pass jumbo frames.</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,462 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="technical-considerations-network-focus">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>Technical considerations</title>
|
|
||||||
<para>When you design an OpenStack network architecture, you must
|
|
||||||
consider layer-2 and layer-3 issues. Layer-2
|
|
||||||
decisions involve those made at the data-link layer, such as
|
|
||||||
the decision to use Ethernet versus Token Ring. Layer-3 decisions
|
|
||||||
involve those made about the protocol layer and the point when
|
|
||||||
IP comes into the picture. As an example, a completely
|
|
||||||
internal OpenStack network can exist at layer 2 and ignore
|
|
||||||
layer 3. In order for any traffic to go outside of
|
|
||||||
that cloud, to another network, or to the Internet, however, you must
|
|
||||||
use a layer-3 router or switch.</para>
|
|
||||||
<para>The past few years have seen two competing trends in
|
|
||||||
networking. One trend leans towards building data center network
|
|
||||||
architectures based on layer-2 networking. Another trend treats
|
|
||||||
the cloud environment essentially as a miniature version of the
|
|
||||||
Internet. This approach is radically different from the network
|
|
||||||
architecture approach in the staging environment:
|
|
||||||
the Internet only uses layer-3 routing rather than
|
|
||||||
layer-2 switching.</para>
|
|
||||||
<para>A network designed on layer-2 protocols has advantages over one
|
|
||||||
designed on layer-3 protocols. In spite of the difficulties of
|
|
||||||
using a bridge to perform the network role of a router, many
|
|
||||||
vendors, customers, and service providers choose to use Ethernet
|
|
||||||
in as many parts of their networks as possible. The benefits of
|
|
||||||
selecting a layer-2 design are:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Ethernet frames contain all the essentials for
|
|
||||||
networking. These include, but are not limited to,
|
|
||||||
globally unique source addresses, globally unique
|
|
||||||
destination addresses, and error control.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Ethernet frames can carry any kind of packet.
|
|
||||||
Networking at layer 2 is independent of the layer-3
|
|
||||||
protocol.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Adding more layers to the Ethernet frame only slows
|
|
||||||
the networking process down. This is known as 'nodal
|
|
||||||
processing delay'.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>You can add adjunct networking features, for
|
|
||||||
example class of service (CoS) or multicasting, to
|
|
||||||
Ethernet as readily as IP networks.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>VLANs are an easy mechanism for isolating
|
|
||||||
networks.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Most information starts and ends inside Ethernet frames.
|
|
||||||
Today this applies to data, voice (for example, VoIP), and
|
|
||||||
video (for example, web cameras). The concept is that, if you can
|
|
||||||
perform more of the end-to-end transfer of information from
|
|
||||||
a source to a destination in the form of Ethernet frames, the network
|
|
||||||
benefits more from the advantages of Ethernet.
|
|
||||||
Although it is not a substitute for IP networking, networking at
|
|
||||||
layer 2 can be a powerful adjunct to IP networking.</para>
|
|
||||||
<para>
|
|
||||||
Layer-2 Ethernet usage has these advantages over layer-3 IP
|
|
||||||
network usage:
|
|
||||||
</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Speed</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Reduced overhead of the IP hierarchy.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>No need to keep track of address configuration as systems
|
|
||||||
move around. Whereas the simplicity of layer-2
|
|
||||||
protocols might work well in a data center with hundreds
|
|
||||||
of physical machines, cloud data centers have the
|
|
||||||
additional burden of needing to keep track of all virtual
|
|
||||||
machine addresses and networks. In these data centers, it
|
|
||||||
is not uncommon for one physical node to support 30-40
|
|
||||||
instances.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<important>
|
|
||||||
<para>Networking at the frame level says nothing
|
|
||||||
about the presence or absence of IP addresses at the packet
|
|
||||||
level. Almost all ports, links, and devices on a network of
|
|
||||||
LAN switches still have IP addresses, as do all the source and
|
|
||||||
destination hosts. There are many reasons for the continued
|
|
||||||
need for IP addressing. The largest one is the need to manage
|
|
||||||
the network. A device or link without an IP address is usually
|
|
||||||
invisible to most management applications. Utilities including
|
|
||||||
remote access for diagnostics, file transfer of configurations
|
|
||||||
and software, and similar applications cannot run without IP
|
|
||||||
addresses as well as MAC addresses.</para>
|
|
||||||
</important>
|
|
||||||
<section xml:id="layer-2-arch-limitations">
|
|
||||||
<title>Layer-2 architecture limitations</title>
|
|
||||||
<para>Outside of the traditional data center the limitations of
|
|
||||||
layer-2 network architectures become more obvious.</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Number of VLANs is limited to 4096.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>The number of MACs stored in switch tables is
|
|
||||||
limited.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>You must accommodate the need to maintain a set of
|
|
||||||
layer-4 devices to handle traffic control.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>MLAG, often used for switch redundancy, is a
|
|
||||||
proprietary solution that does not scale beyond two
|
|
||||||
devices and forces vendor lock-in.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>It can be difficult to troubleshoot a network
|
|
||||||
without IP addresses and ICMP.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Configuring <glossterm
|
|
||||||
baseform="Address Resolution Protocol (ARP)">ARP</glossterm>
|
|
||||||
can be complicated on large layer-2 networks.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>All network devices need to be aware of all MACs,
|
|
||||||
even instance MACs, so there is constant churn in MAC
|
|
||||||
tables and network state changes as instances start and
|
|
||||||
stop.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Migrating MACs (instance migration) to different
|
|
||||||
physical locations are a potential problem if you do not
|
|
||||||
set ARP table timeouts properly.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>It is important to know that layer 2 has a very limited set
|
|
||||||
of network management tools. It is very difficult to control
|
|
||||||
traffic, as it does not have mechanisms to manage the network
|
|
||||||
or shape the traffic, and network troubleshooting is very
|
|
||||||
difficult. One reason for this difficulty is network devices
|
|
||||||
have no IP addresses. As a result, there is no reasonable way
|
|
||||||
to check network delay in a layer-2 network.</para>
|
|
||||||
<para>On large layer-2 networks, configuring ARP learning can also
|
|
||||||
be complicated. The setting for the MAC address timer on
|
|
||||||
switches is critical and, if set incorrectly, can cause
|
|
||||||
significant performance problems. As an example, the Cisco
|
|
||||||
default MAC address timer is extremely long. Migrating MACs to
|
|
||||||
different physical locations to support instance migration can
|
|
||||||
be a significant problem. In this case, the network
|
|
||||||
information maintained in the switches could be out of sync
|
|
||||||
with the new location of the instance.</para>
|
|
||||||
<para>In a layer-2 network, all devices are aware of all MACs,
|
|
||||||
even those that belong to instances. The network state
|
|
||||||
information in the backbone changes whenever an instance starts
|
|
||||||
or stops. As a result there is far too much churn in
|
|
||||||
the MAC tables on the backbone switches.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="layer-3-arch-advantages">
|
|
||||||
<title>Layer-3 architecture advantages</title>
|
|
||||||
<para>In the layer 3 case, there is no churn in the routing tables
|
|
||||||
due to instances starting and stopping. The only time there
|
|
||||||
would be a routing state change is in the case of a Top
|
|
||||||
of Rack (ToR) switch failure or a link failure in the backbone
|
|
||||||
itself. Other advantages of using a layer-3 architecture
|
|
||||||
include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Layer-3 networks provide the same level of
|
|
||||||
resiliency and scalability as the Internet.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Controlling traffic with routing metrics is
|
|
||||||
straightforward.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>You can configure layer 3 to use <glossterm
|
|
||||||
baseform="Border Gateway Protocol (BGP)">BGP</glossterm>
|
|
||||||
confederation for scalability so core routers have state
|
|
||||||
proportional to the number of racks, not to the number of
|
|
||||||
servers or instances.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Routing takes instance MAC and IP addresses
|
|
||||||
out of the network core, reducing state churn. Routing
|
|
||||||
state changes only occur in the case of a ToR switch
|
|
||||||
failure or backbone link failure.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>There are a variety of well tested tools, for
|
|
||||||
example ICMP, to monitor and manage traffic.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Layer-3 architectures enable the use of Quality
|
|
||||||
of Service (QoS) to manage network performance.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<section xml:id="layer-3-arch-limitations">
|
|
||||||
<title>Layer-3 architecture limitations</title>
|
|
||||||
<para>The main limitation of layer 3 is that there is no built-in
|
|
||||||
isolation mechanism comparable to the VLANs in layer-2
|
|
||||||
networks. Furthermore, the hierarchical nature of IP addresses
|
|
||||||
means that an instance is on the same subnet as its
|
|
||||||
physical host. This means that you cannot migrate it outside
|
|
||||||
of the subnet easily. For these reasons, network
|
|
||||||
virtualization needs to use IP <glossterm>encapsulation</glossterm>
|
|
||||||
and software at the end hosts for isolation and the separation of
|
|
||||||
the addressing in the virtual layer from the addressing in the
|
|
||||||
physical layer. Other potential disadvantages of layer 3
|
|
||||||
include the need to design an IP addressing scheme rather than
|
|
||||||
relying on the switches to keep track of the MAC
|
|
||||||
addresses automatically and to configure the interior gateway routing
|
|
||||||
protocol in the switches.</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
||||||
<section xml:id="network-recommendations-overview">
|
|
||||||
<title>Network recommendations overview</title>
|
|
||||||
<para>OpenStack has complex networking requirements for several
|
|
||||||
reasons. Many components interact at different levels of the
|
|
||||||
system stack that adds complexity. Data flows are complex.
|
|
||||||
Data in an OpenStack cloud moves both between instances across
|
|
||||||
the network (also known as East-West), as well as in and out
|
|
||||||
of the system (also known as North-South). Physical server
|
|
||||||
nodes have network requirements that are independent of instance
|
|
||||||
network requirements, which you must isolate from the core
|
|
||||||
network to account for scalability. We recommend
|
|
||||||
functionally separating the networks for security purposes and
|
|
||||||
tuning performance through traffic shaping.</para>
|
|
||||||
<para>You must consider a number of important general technical
|
|
||||||
and business factors when planning and
|
|
||||||
designing an OpenStack network. They include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>A requirement for vendor independence. To avoid
|
|
||||||
hardware or software vendor lock-in, the design should
|
|
||||||
not rely on specific features of a vendor's router or
|
|
||||||
switch.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>A requirement to massively scale the ecosystem to
|
|
||||||
support millions of end users.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>A requirement to support indeterminate platforms and
|
|
||||||
applications.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>A requirement to design for cost efficient
|
|
||||||
operations to take advantage of massive scale.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>A requirement to ensure that there is no single
|
|
||||||
point of failure in the cloud ecosystem.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>A requirement for high availability architecture to
|
|
||||||
meet customer SLA requirements.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>A requirement to be tolerant of rack level
|
|
||||||
failure.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>A requirement to maximize flexibility to architect
|
|
||||||
future production environments.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<para>Bearing in mind these considerations, we recommend the following:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Layer-3 designs are preferable to layer-2
|
|
||||||
architectures.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Design a dense multi-path network core to support
|
|
||||||
multi-directional scaling and flexibility.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Use hierarchical addressing because it is the only
|
|
||||||
viable option to scale network ecosystem.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Use virtual networking to isolate instance service
|
|
||||||
network traffic from the management and internal
|
|
||||||
network traffic.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Isolate virtual networks using encapsulation
|
|
||||||
technologies.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Use traffic shaping for performance tuning.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Use eBGP to connect to the Internet up-link.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Use iBGP to flatten the internal traffic on the
|
|
||||||
layer-3 mesh.</para>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<para>Determine the most effective configuration for block
|
|
||||||
storage network.</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist></section>
|
|
||||||
<section xml:id="additional-considerations-network-focus">
|
|
||||||
<title>Additional considerations</title>
|
|
||||||
<para>There are several further considerations when designing a
|
|
||||||
network-focused OpenStack cloud.</para>
|
|
||||||
<section xml:id="openstack-networking-versus-nova-network">
|
|
||||||
<title>OpenStack Networking versus legacy networking (nova-network)
|
|
||||||
considerations</title>
|
|
||||||
<para>Selecting the type of networking technology to implement
|
|
||||||
depends on many factors. OpenStack Networking (neutron) and
|
|
||||||
legacy networking (nova-network) both have their advantages and
|
|
||||||
disadvantages. They are both valid and supported options that fit
|
|
||||||
different use cases:</para>
|
|
||||||
<informaltable rules="all">
|
|
||||||
<col width="40%" />
|
|
||||||
<col width="60%" />
|
|
||||||
<thead>
|
|
||||||
<tr><th>Legacy networking (nova-network)</th>
|
|
||||||
<th>OpenStack Networking</th></tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>Simple, single agent</td>
|
|
||||||
<td>Complex, multiple agents</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>More mature, established</td>
|
|
||||||
<td>Newer, maturing</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Flat or VLAN</td>
|
|
||||||
<td>Flat, VLAN, Overlays, L2-L3, SDN</td></tr>
|
|
||||||
<tr>
|
|
||||||
<td>No plug-in support</td>
|
|
||||||
<td>Plug-in support for 3rd parties</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Scales well</td>
|
|
||||||
<td>Scaling requires 3rd party plug-ins</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>No multi-tier topologies</td>
|
|
||||||
<td>Multi-tier topologies</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</informaltable>
|
|
||||||
</section>
|
|
||||||
<section xml:id="redundant-networking-tor-switch-ha">
|
|
||||||
<title>Redundant networking: ToR switch high availability
|
|
||||||
risk analysis</title>
|
|
||||||
<para>A technical consideration of networking is the idea that
|
|
||||||
you should install switching gear in a data center
|
|
||||||
with backup switches in case of hardware failure.</para>
|
|
||||||
<para>Research indicates the mean time between failures (MTBF) on switches
|
|
||||||
is between 100,000 and 200,000 hours. This number is dependent
|
|
||||||
on the ambient temperature of the switch in the data
|
|
||||||
center. When properly cooled and maintained, this translates to
|
|
||||||
between 11 and 22 years before failure. Even in the worst case
|
|
||||||
of poor ventilation and high ambient temperatures in the data
|
|
||||||
center, the MTBF is still 2-3 years. See <link
|
|
||||||
xlink:href="http://www.garrettcom.com/techsupport/papers/ethernet_switch_reliability.pdf">http://www.garrettcom.com/techsupport/papers/ethernet_switch_reliability.pdf</link>
|
|
||||||
for further information.</para>
|
|
||||||
<para>In most cases, it is much more economical to use a
|
|
||||||
single switch with a small pool of spare switches to replace
|
|
||||||
failed units than it is to outfit an entire data center with
|
|
||||||
redundant switches. Applications should tolerate rack level
|
|
||||||
outages without affecting normal
|
|
||||||
operations, since network and compute resources are easily
|
|
||||||
provisioned and plentiful.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="preparing-for-future-ipv6-support">
|
|
||||||
<title>Preparing for the future: IPv6 support</title>
|
|
||||||
<para>One of the most important networking topics today is the
|
|
||||||
impending exhaustion of IPv4 addresses. In early 2014, ICANN
|
|
||||||
announced that they started allocating the final IPv4 address
|
|
||||||
blocks to the Regional Internet Registries (<link
|
|
||||||
xlink:href="http://www.internetsociety.org/deploy360/blog/2014/05/goodbye-ipv4-iana-starts-allocating-final-address-blocks/">http://www.internetsociety.org/deploy360/blog/2014/05/goodbye-ipv4-iana-starts-allocating-final-address-blocks/</link>).
|
|
||||||
This means the IPv4 address space is close to being fully
|
|
||||||
allocated. As a result, it will soon become difficult to
|
|
||||||
allocate more IPv4 addresses to an application that has
|
|
||||||
experienced growth, or that you expect to scale out, due to the lack
|
|
||||||
of unallocated IPv4 address blocks.</para>
|
|
||||||
<para>For network focused applications the future is the IPv6
|
|
||||||
protocol. IPv6 increases the address space significantly,
|
|
||||||
fixes long standing issues in the IPv4 protocol, and will
|
|
||||||
become essential for network focused applications in the
|
|
||||||
future.</para>
|
|
||||||
<para>OpenStack Networking supports IPv6 when configured to take
|
|
||||||
advantage of it. To enable IPv6, create an IPv6 subnet in
|
|
||||||
Networking and use IPv6 prefixes when creating security
|
|
||||||
groups.</para></section>
|
|
||||||
<section xml:id="asymmetric-links">
|
|
||||||
<title>Asymmetric links</title>
|
|
||||||
<para>When designing a network architecture, the traffic patterns
|
|
||||||
of an application heavily influence the allocation of
|
|
||||||
total bandwidth and the number of links that you use to send
|
|
||||||
and receive traffic. Applications that provide file storage
|
|
||||||
for customers allocate bandwidth and links to favor
|
|
||||||
incoming traffic, whereas video streaming applications
|
|
||||||
allocate bandwidth and links to favor outgoing traffic.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="performance-network-focus">
|
|
||||||
<title>Performance</title>
|
|
||||||
<para>It is important to analyze the applications' tolerance for
|
|
||||||
latency and jitter when designing an environment to support
|
|
||||||
network focused applications. Certain applications, for
|
|
||||||
example VoIP, are less tolerant of latency and jitter. Where
|
|
||||||
latency and jitter are concerned, certain applications may
|
|
||||||
require tuning of QoS parameters and network device queues to
|
|
||||||
ensure that they queue for transmit immediately or
|
|
||||||
guarantee minimum bandwidth. Since OpenStack currently does
|
|
||||||
not support these functions, consider carefully your selected
|
|
||||||
network plug-in.</para>
|
|
||||||
<para>The location of a service may also impact the application or
|
|
||||||
consumer experience. If an application serves
|
|
||||||
differing content to different users it must properly direct
|
|
||||||
connections to those specific locations. Where appropriate,
|
|
||||||
use a multi-site installation for these situations.</para>
|
|
||||||
<para>You can implement networking in two separate
|
|
||||||
ways. Legacy networking (nova-network) provides a flat DHCP network
|
|
||||||
with a single broadcast domain. This implementation does not
|
|
||||||
support tenant isolation networks or advanced plug-ins, but it
|
|
||||||
is currently the only way to implement a distributed layer-3
|
|
||||||
agent using the multi_host configuration.
|
|
||||||
OpenStack Networking (neutron) is the official networking implementation
|
|
||||||
and provides a pluggable architecture that supports a large
|
|
||||||
variety of network methods. Some of these include a layer-2
|
|
||||||
only provider network model, external device plug-ins, or even
|
|
||||||
OpenFlow controllers.</para>
|
|
||||||
<para>Networking at large scales becomes a set of boundary
|
|
||||||
questions. The determination of how large a layer-2 domain
|
|
||||||
must be is based on the amount of nodes within the domain
|
|
||||||
and the amount of broadcast traffic that passes between
|
|
||||||
instances. Breaking layer-2 boundaries may require the
|
|
||||||
implementation of overlay networks and tunnels. This decision
|
|
||||||
is a balancing act between the need for a smaller overhead or
|
|
||||||
a need for a smaller domain.</para>
|
|
||||||
<para>When selecting network devices, be aware that making this
|
|
||||||
decision based on the greatest port density often comes with a
|
|
||||||
drawback. Aggregation switches and routers have not all kept
|
|
||||||
pace with Top of Rack switches and may induce bottlenecks on
|
|
||||||
north-south traffic. As a result, it may be possible for
|
|
||||||
massive amounts of downstream network utilization to impact
|
|
||||||
upstream network devices, impacting service to the cloud.
|
|
||||||
Since OpenStack does not currently provide a mechanism for
|
|
||||||
traffic shaping or rate limiting, it is necessary to implement
|
|
||||||
these features at the network hardware level.</para>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,104 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<section xmlns="http://docbook.org/ns/docbook"
|
|
||||||
xmlns:xi="http://www.w3.org/2001/XInclude"
|
|
||||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
||||||
version="5.0"
|
|
||||||
xml:id="user-requirements-network-focus">
|
|
||||||
<?dbhtml stop-chunking?>
|
|
||||||
<title>User requirements</title>
|
|
||||||
<para>Network-focused architectures vary from the general-purpose
|
|
||||||
architecture designs. Certain network-intensive applications influence
|
|
||||||
these architectures. Some of the business requirements that influence
|
|
||||||
the design include:</para>
|
|
||||||
<itemizedlist>
|
|
||||||
<listitem>
|
|
||||||
<para>Network latency through slow page loads, degraded video
|
|
||||||
streams, and low quality VoIP sessions impacts the user
|
|
||||||
experience. Users are often not aware of how network design and
|
|
||||||
architecture affects their experiences. Both enterprise customers
|
|
||||||
and end-users rely on the network for delivery of an application.
|
|
||||||
Network performance problems can result in a negative experience
|
|
||||||
for the end-user, as well as productivity and economic loss.
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
<section xml:id="high-availability-issues-network-focus">
|
|
||||||
<title>High availability issues</title>
|
|
||||||
<para>Depending on the application and use case, network-intensive
|
|
||||||
OpenStack installations can have high availability requirements.
|
|
||||||
Financial transaction systems have a much higher requirement for high
|
|
||||||
availability than a development application. Use network availability
|
|
||||||
technologies, for example quality of service (QoS), to improve the
|
|
||||||
network performance of sensitive applications such as VoIP and video
|
|
||||||
streaming.</para>
|
|
||||||
<para>High performance systems have SLA requirements for a minimum
|
|
||||||
QoS with regard to guaranteed uptime, latency, and bandwidth. The level
|
|
||||||
of the SLA can have a significant impact on the network architecture and
|
|
||||||
requirements for redundancy in the systems.</para>
|
|
||||||
</section>
|
|
||||||
<section xml:id="risks-network-focus">
|
|
||||||
<title>Risks</title>
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Network misconfigurations</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Configuring incorrect IP addresses, VLANs, and routers
|
|
||||||
can cause outages to areas of the network or, in the worst-case
|
|
||||||
scenario, the entire cloud infrastructure. Automate network
|
|
||||||
configurations to minimize the opportunity for operator error
|
|
||||||
as it can cause disruptive problems.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Capacity planning</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Cloud networks require management for capacity and growth
|
|
||||||
over time. Capacity planning includes the purchase of network
|
|
||||||
circuits and hardware that can potentially have lead times
|
|
||||||
measured in months or years.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Network tuning</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Configure cloud networks to minimize link loss, packet loss,
|
|
||||||
packet storms, broadcast storms, and loops.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Single Point Of Failure (SPOF)</term>
|
|
||||||
<listitem>
|
|
||||||
<para>Consider high availability at the physical and environmental
|
|
||||||
layers. If there is a single point of failure due to only one
|
|
||||||
upstream link, or only one power supply, an outage can become
|
|
||||||
unavoidable.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Complexity</term>
|
|
||||||
<listitem>
|
|
||||||
<para>An overly complex network design can be difficult to
|
|
||||||
maintain and troubleshoot. While device-level configuration
|
|
||||||
can ease maintenance concerns and automated tools can handle
|
|
||||||
overlay networks, avoid or document non-traditional interconnects
|
|
||||||
between functions and specialized hardware to prevent
|
|
||||||
outages.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term>Non-standard features</term>
|
|
||||||
<listitem>
|
|
||||||
<para>There are additional risks that arise from configuring the
|
|
||||||
cloud network to take advantage of vendor specific features.
|
|
||||||
One example is multi-link aggregation (MLAG) used to provide
|
|
||||||
redundancy at the aggregator switch level of the network. MLAG
|
|
||||||
is not a standard and, as a result, each vendor has their own
|
|
||||||
proprietary implementation of the feature. MLAG architectures
|
|
||||||
are not interoperable across switch vendors, which leads to
|
|
||||||
vendor lock-in, and can cause delays or inability when upgrading
|
|
||||||
components.</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
@ -1,83 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
|
||||||
<parent>
|
|
||||||
<groupId>org.openstack.docs</groupId>
|
|
||||||
<artifactId>parent-pom</artifactId>
|
|
||||||
<version>1.0.0-SNAPSHOT</version>
|
|
||||||
<relativePath>../pom.xml</relativePath>
|
|
||||||
</parent>
|
|
||||||
<modelVersion>4.0.0</modelVersion>
|
|
||||||
<artifactId>openstack-arch-design</artifactId>
|
|
||||||
<packaging>jar</packaging>
|
|
||||||
<name>OpenStack Architecture Design Guide</name>
|
|
||||||
<properties>
|
|
||||||
<!-- This is set by Jenkins according to the branch. -->
|
|
||||||
<release.path.name></release.path.name>
|
|
||||||
<comments.enabled>0</comments.enabled>
|
|
||||||
</properties>
|
|
||||||
<!-- ################################################ -->
|
|
||||||
<!-- USE "mvn clean generate-sources" to run this POM -->
|
|
||||||
<!-- ################################################ -->
|
|
||||||
<build>
|
|
||||||
<plugins>
|
|
||||||
<plugin>
|
|
||||||
<groupId>com.rackspace.cloud.api</groupId>
|
|
||||||
<artifactId>clouddocs-maven-plugin</artifactId>
|
|
||||||
<!-- version set in ../pom.xml -->
|
|
||||||
<executions>
|
|
||||||
<execution>
|
|
||||||
<id>generate-webhelp</id>
|
|
||||||
<goals>
|
|
||||||
<goal>generate-webhelp</goal>
|
|
||||||
</goals>
|
|
||||||
<phase>generate-sources</phase>
|
|
||||||
<configuration>
|
|
||||||
<!-- These parameters only apply to webhelp -->
|
|
||||||
<enableDisqus>0</enableDisqus>
|
|
||||||
<disqusShortname>openstack-arch-design</disqusShortname>
|
|
||||||
<enableGoogleAnalytics>1</enableGoogleAnalytics>
|
|
||||||
<googleAnalyticsId>UA-17511903-1</googleAnalyticsId>
|
|
||||||
<generateToc>
|
|
||||||
appendix toc,title
|
|
||||||
article/appendix nop
|
|
||||||
article toc,title
|
|
||||||
book toc,title,figure,table,example,equation
|
|
||||||
chapter toc,title
|
|
||||||
section toc
|
|
||||||
part toc,title
|
|
||||||
qandadiv toc
|
|
||||||
qandaset toc
|
|
||||||
reference toc,title
|
|
||||||
set toc,title
|
|
||||||
</generateToc>
|
|
||||||
<!-- The following elements sets the autonumbering of sections in output for chapter numbers but no numbered sections-->
|
|
||||||
<sectionAutolabel>0</sectionAutolabel>
|
|
||||||
<tocSectionDepth>1</tocSectionDepth>
|
|
||||||
<sectionLabelIncludesComponentLabel>0</sectionLabelIncludesComponentLabel>
|
|
||||||
<webhelpDirname>arch-design</webhelpDirname>
|
|
||||||
<pdfFilenameBase>arch-design</pdfFilenameBase>
|
|
||||||
<pageWidth>7.44in</pageWidth>
|
|
||||||
<pageHeight>9.68in</pageHeight>
|
|
||||||
<doubleSided>1</doubleSided>
|
|
||||||
<omitCover>1</omitCover>
|
|
||||||
</configuration>
|
|
||||||
</execution>
|
|
||||||
</executions>
|
|
||||||
<configuration>
|
|
||||||
<!-- These parameters apply to pdf and webhelp -->
|
|
||||||
<xincludeSupported>true</xincludeSupported>
|
|
||||||
<sourceDirectory>.</sourceDirectory>
|
|
||||||
<includes>
|
|
||||||
bk-openstack-arch-design.xml
|
|
||||||
</includes>
|
|
||||||
<canonicalUrlBase>http://docs.openstack.org/openstack-arch-design/content</canonicalUrlBase>
|
|
||||||
<glossaryCollection>${basedir}/../glossary/glossary-terms.xml</glossaryCollection>
|
|
||||||
<branding>openstack</branding>
|
|
||||||
<formalProcedures>0</formalProcedures>
|
|
||||||
</configuration>
|
|
||||||
</plugin>
|
|
||||||
</plugins>
|
|
||||||
</build>
|
|
||||||
</project>
|
|